// Copyright 2018 The Abseil Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // An open-addressing // hashtable with quadratic probing. // // This is a low level hashtable on top of which different interfaces can be // implemented, like flat_hash_set, node_hash_set, string_hash_set, etc. // // The table interface is similar to that of std::unordered_set. Notable // differences are that most member functions support heterogeneous keys when // BOTH the hash and eq functions are marked as transparent. They do so by // providing a typedef called `is_transparent`. // // When heterogeneous lookup is enabled, functions that take key_type act as if // they have an overload set like: // // iterator find(const key_type& key); // template // iterator find(const K& key); // // size_type erase(const key_type& key); // template // size_type erase(const K& key); // // std::pair equal_range(const key_type& key); // template // std::pair equal_range(const K& key); // // When heterogeneous lookup is disabled, only the explicit `key_type` overloads // exist. // // In addition the pointer to element and iterator stability guarantees are // weaker: all iterators and pointers are invalidated after a new element is // inserted. // // IMPLEMENTATION DETAILS // // # Table Layout // // A raw_hash_set's backing array consists of control bytes followed by slots // that may or may not contain objects. // // The layout of the backing array, for `capacity` slots, is thus, as a // pseudo-struct: // // struct BackingArray { // // Sampling handler. This field isn't present when the sampling is // // disabled or this allocation hasn't been selected for sampling. // HashtablezInfoHandle infoz_; // // The number of elements we can insert before growing the capacity. // size_t growth_left; // // Control bytes for the "real" slots. // ctrl_t ctrl[capacity]; // // Always `ctrl_t::kSentinel`. This is used by iterators to find when to // // stop and serves no other purpose. // ctrl_t sentinel; // // A copy of the first `kWidth - 1` elements of `ctrl`. This is used so // // that if a probe sequence picks a value near the end of `ctrl`, // // `Group` will have valid control bytes to look at. // ctrl_t clones[kWidth - 1]; // // The actual slot data. // slot_type slots[capacity]; // }; // // The length of this array is computed by `RawHashSetLayout::alloc_size` below. // // Control bytes (`ctrl_t`) are bytes (collected into groups of a // platform-specific size) that define the state of the corresponding slot in // the slot array. Group manipulation is tightly optimized to be as efficient // as possible: SSE and friends on x86, clever bit operations on other arches. // // Group 1 Group 2 Group 3 // +---------------+---------------+---------------+ // | | | | | | | | | | | | | | | | | | | | | | | | | // +---------------+---------------+---------------+ // // Each control byte is either a special value for empty slots, deleted slots // (sometimes called *tombstones*), and a special end-of-table marker used by // iterators, or, if occupied, seven bits (H2) from the hash of the value in the // corresponding slot. // // Storing control bytes in a separate array also has beneficial cache effects, // since more logical slots will fit into a cache line. // // # Small Object Optimization (SOO) // // When the size/alignment of the value_type and the capacity of the table are // small, we enable small object optimization and store the values inline in // the raw_hash_set object. This optimization allows us to avoid // allocation/deallocation as well as cache/dTLB misses. // // # Hashing // // We compute two separate hashes, `H1` and `H2`, from the hash of an object. // `H1(hash(x))` is an index into `slots`, and essentially the starting point // for the probe sequence. `H2(hash(x))` is a 7-bit value used to filter out // objects that cannot possibly be the one we are looking for. // // # Table operations. // // The key operations are `insert`, `find`, and `erase`. // // Since `insert` and `erase` are implemented in terms of `find`, we describe // `find` first. To `find` a value `x`, we compute `hash(x)`. From // `H1(hash(x))` and the capacity, we construct a `probe_seq` that visits every // group of slots in some interesting order. // // We now walk through these indices. At each index, we select the entire group // starting with that index and extract potential candidates: occupied slots // with a control byte equal to `H2(hash(x))`. If we find an empty slot in the // group, we stop and return an error. Each candidate slot `y` is compared with // `x`; if `x == y`, we are done and return `&y`; otherwise we continue to the // next probe index. Tombstones effectively behave like full slots that never // match the value we're looking for. // // The `H2` bits ensure when we compare a slot to an object with `==`, we are // likely to have actually found the object. That is, the chance is low that // `==` is called and returns `false`. Thus, when we search for an object, we // are unlikely to call `==` many times. This likelyhood can be analyzed as // follows (assuming that H2 is a random enough hash function). // // Let's assume that there are `k` "wrong" objects that must be examined in a // probe sequence. For example, when doing a `find` on an object that is in the // table, `k` is the number of objects between the start of the probe sequence // and the final found object (not including the final found object). The // expected number of objects with an H2 match is then `k/128`. Measurements // and analysis indicate that even at high load factors, `k` is less than 32, // meaning that the number of "false positive" comparisons we must perform is // less than 1/8 per `find`. // `insert` is implemented in terms of `unchecked_insert`, which inserts a // value presumed to not be in the table (violating this requirement will cause // the table to behave erratically). Given `x` and its hash `hash(x)`, to insert // it, we construct a `probe_seq` once again, and use it to find the first // group with an unoccupied (empty *or* deleted) slot. We place `x` into the // first such slot in the group and mark it as full with `x`'s H2. // // To `insert`, we compose `unchecked_insert` with `find`. We compute `h(x)` and // perform a `find` to see if it's already present; if it is, we're done. If // it's not, we may decide the table is getting overcrowded (i.e. the load // factor is greater than 7/8 for big tables; `is_small()` tables use a max load // factor of 1); in this case, we allocate a bigger array, `unchecked_insert` // each element of the table into the new array (we know that no insertion here // will insert an already-present value), and discard the old backing array. At // this point, we may `unchecked_insert` the value `x`. // // Below, `unchecked_insert` is partly implemented by `prepare_insert`, which // presents a viable, initialized slot pointee to the caller. // // `erase` is implemented in terms of `erase_at`, which takes an index to a // slot. Given an offset, we simply create a tombstone and destroy its contents. // If we can prove that the slot would not appear in a probe sequence, we can // make the slot as empty, instead. We can prove this by observing that if a // group has any empty slots, it has never been full (assuming we never create // an empty slot in a group with no empties, which this heuristic guarantees we // never do) and find would stop at this group anyways (since it does not probe // beyond groups with empties). // // `erase` is `erase_at` composed with `find`: if we // have a value `x`, we can perform a `find`, and then `erase_at` the resulting // slot. // // To iterate, we simply traverse the array, skipping empty and deleted slots // and stopping when we hit a `kSentinel`. #ifndef ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ #define ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/base/internal/endian.h" #include "absl/base/internal/iterator_traits.h" #include "absl/base/internal/raw_logging.h" #include "absl/base/macros.h" #include "absl/base/optimization.h" #include "absl/base/options.h" #include "absl/base/port.h" #include "absl/base/prefetch.h" #include "absl/container/internal/common.h" // IWYU pragma: export // for node_handle #include "absl/container/internal/common_policy_traits.h" #include "absl/container/internal/compressed_tuple.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" #include "absl/container/internal/hash_policy_traits.h" #include "absl/container/internal/hashtable_control_bytes.h" #include "absl/container/internal/hashtable_debug_hooks.h" #include "absl/container/internal/hashtablez_sampler.h" #include "absl/functional/function_ref.h" #include "absl/hash/hash.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" #include "absl/numeric/bits.h" #include "absl/utility/utility.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS #error ABSL_SWISSTABLE_ENABLE_GENERATIONS cannot be directly set #elif (defined(ABSL_HAVE_ADDRESS_SANITIZER) || \ defined(ABSL_HAVE_HWADDRESS_SANITIZER) || \ defined(ABSL_HAVE_MEMORY_SANITIZER)) && \ !defined(NDEBUG_SANITIZER) // If defined, performance is important. // When compiled in sanitizer mode, we add generation integers to the backing // array and iterators. In the backing array, we store the generation between // the control bytes and the slots. When iterators are dereferenced, we assert // that the container has not been mutated in a way that could cause iterator // invalidation since the iterator was initialized. #define ABSL_SWISSTABLE_ENABLE_GENERATIONS #endif #ifdef ABSL_SWISSTABLE_ASSERT #error ABSL_SWISSTABLE_ASSERT cannot be directly set #else // We use this macro for assertions that users may see when the table is in an // invalid state that sanitizers may help diagnose. #define ABSL_SWISSTABLE_ASSERT(CONDITION) \ assert((CONDITION) && "Try enabling sanitizers.") #endif // We use uint8_t so we don't need to worry about padding. using GenerationType = uint8_t; // A sentinel value for empty generations. Using 0 makes it easy to constexpr // initialize an array of this value. constexpr GenerationType SentinelEmptyGeneration() { return 0; } constexpr GenerationType NextGeneration(GenerationType generation) { return ++generation == SentinelEmptyGeneration() ? ++generation : generation; } #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS constexpr bool SwisstableGenerationsEnabled() { return true; } constexpr size_t NumGenerationBytes() { return sizeof(GenerationType); } #else constexpr bool SwisstableGenerationsEnabled() { return false; } constexpr size_t NumGenerationBytes() { return 0; } #endif // Returns true if we should assert that the table is not accessed after it has // been destroyed or during the destruction of the table. constexpr bool SwisstableAssertAccessToDestroyedTable() { #ifndef NDEBUG return true; #endif return SwisstableGenerationsEnabled(); } template void SwapAlloc(AllocType& lhs, AllocType& rhs, std::true_type /* propagate_on_container_swap */) { using std::swap; swap(lhs, rhs); } template void SwapAlloc(AllocType& lhs, AllocType& rhs, std::false_type /* propagate_on_container_swap */) { (void)lhs; (void)rhs; assert(lhs == rhs && "It's UB to call swap with unequal non-propagating allocators."); } template void CopyAlloc(AllocType& lhs, AllocType& rhs, std::true_type /* propagate_alloc */) { lhs = rhs; } template void CopyAlloc(AllocType&, AllocType&, std::false_type /* propagate_alloc */) {} // The state for a probe sequence. // // Currently, the sequence is a triangular progression of the form // // p(i) := Width * (i^2 + i)/2 + hash (mod mask + 1) // // The use of `Width` ensures that each probe step does not overlap groups; // the sequence effectively outputs the addresses of *groups* (although not // necessarily aligned to any boundary). The `Group` machinery allows us // to check an entire group with minimal branching. // // Wrapping around at `mask + 1` is important, but not for the obvious reason. // As described above, the first few entries of the control byte array // are mirrored at the end of the array, which `Group` will find and use // for selecting candidates. However, when those candidates' slots are // actually inspected, there are no corresponding slots for the cloned bytes, // so we need to make sure we've treated those offsets as "wrapping around". // // It turns out that this probe sequence visits every group exactly once if the // number of groups is a power of two, since (i^2+i)/2 is a bijection in // Z/(2^m). See https://en.wikipedia.org/wiki/Quadratic_probing template class probe_seq { public: // Creates a new probe sequence using `hash` as the initial value of the // sequence and `mask` (usually the capacity of the table) as the mask to // apply to each value in the progression. probe_seq(size_t hash, size_t mask) { ABSL_SWISSTABLE_ASSERT(((mask + 1) & mask) == 0 && "not a mask"); mask_ = mask; offset_ = hash & mask_; } // The offset within the table, i.e., the value `p(i)` above. size_t offset() const { return offset_; } size_t offset(size_t i) const { return (offset_ + i) & mask_; } void next() { index_ += Width; offset_ += index_; offset_ &= mask_; } // 0-based probe index, a multiple of `Width`. size_t index() const { return index_; } private: size_t mask_; size_t offset_; size_t index_ = 0; }; template struct RequireUsableKey { template std::pair< decltype(std::declval()(std::declval())), decltype(std::declval()(std::declval(), std::declval()))>* operator()(const PassedKey&, const Args&...) const; }; template struct IsDecomposable : std::false_type {}; template struct IsDecomposable< absl::void_t(), std::declval()...))>, Policy, Hash, Eq, Ts...> : std::true_type {}; // TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it. template constexpr bool IsNoThrowSwappable(std::true_type = {} /* is_swappable */) { using std::swap; return noexcept(swap(std::declval(), std::declval())); } template constexpr bool IsNoThrowSwappable(std::false_type /* is_swappable */) { return false; } // See definition comment for why this is size 32. ABSL_DLL extern const ctrl_t kEmptyGroup[32]; // We use these sentinel capacity values in debug mode to indicate different // classes of bugs. enum InvalidCapacity : size_t { kAboveMaxValidCapacity = ~size_t{} - 100, kReentrance, kDestroyed, // These two must be last because we use `>= kMovedFrom` to mean moved-from. kMovedFrom, kSelfMovedFrom, }; // Returns a pointer to a control byte group that can be used by empty tables. inline ctrl_t* EmptyGroup() { // Const must be cast away here; no uses of this function will actually write // to it because it is only used for empty tables. return const_cast(kEmptyGroup + 16); } // For use in SOO iterators. // TODO(b/289225379): we could potentially get rid of this by adding an is_soo // bit in iterators. This would add branches but reduce cache misses. ABSL_DLL extern const ctrl_t kSooControl[17]; // Returns a pointer to a full byte followed by a sentinel byte. inline ctrl_t* SooControl() { // Const must be cast away here; no uses of this function will actually write // to it because it is only used for SOO iterators. return const_cast(kSooControl); } // Whether ctrl is from the SooControl array. inline bool IsSooControl(const ctrl_t* ctrl) { return ctrl == SooControl(); } // Returns a pointer to a generation to use for an empty hashtable. GenerationType* EmptyGeneration(); // Returns whether `generation` is a generation for an empty hashtable that // could be returned by EmptyGeneration(). inline bool IsEmptyGeneration(const GenerationType* generation) { return *generation == SentinelEmptyGeneration(); } // We only allow a maximum of 1 SOO element, which makes the implementation // much simpler. Complications with multiple SOO elements include: // - Satisfying the guarantee that erasing one element doesn't invalidate // iterators to other elements means we would probably need actual SOO // control bytes. // - In order to prevent user code from depending on iteration order for small // tables, we would need to randomize the iteration order somehow. constexpr size_t SooCapacity() { return 1; } // Sentinel type to indicate SOO CommonFields construction. struct soo_tag_t {}; // Sentinel type to indicate SOO CommonFields construction with full size. struct full_soo_tag_t {}; // Sentinel type to indicate non-SOO CommonFields construction. struct non_soo_tag_t {}; // Sentinel value to indicate an uninitialized value explicitly. struct uninitialized_tag_t {}; // Sentinel value to indicate creation of an empty table without a seed. struct no_seed_empty_tag_t {}; // Per table hash salt. This gets mixed into H1 to randomize iteration order // per-table. // The seed is needed to ensure non-determinism of iteration order. class PerTableSeed { public: // The number of bits in the seed. // It is big enough to ensure non-determinism of iteration order. // We store the seed inside a uint64_t together with size and other metadata. // Using 16 bits allows us to save one `and` instruction in H1 (we use movzwl // instead of movq+and). static constexpr size_t kBitCount = 16; // Returns the seed for the table. Only the lowest kBitCount are non zero. size_t seed() const { return seed_; } private: friend class HashtableSize; explicit PerTableSeed(size_t seed) : seed_(seed) {} const size_t seed_; }; // Returns next seed base number that is used for generating per-table seeds. // Only the lowest PerTableSeed::kBitCount bits are used for actual hash table // seed. inline size_t NextSeedBaseNumber() { thread_local size_t seed = reinterpret_cast(&seed); if constexpr (sizeof(size_t) == 4) { seed += uint32_t{0xcc9e2d51}; } else { seed += uint64_t{0xdcb22ca68cb134ed}; } return seed; } // The size and also has additionally // 1) one bit that stores whether we have infoz. // 2) PerTableSeed::kBitCount bits for the seed. class HashtableSize { public: static constexpr size_t kSizeBitCount = 64 - PerTableSeed::kBitCount - 1; explicit HashtableSize(uninitialized_tag_t) {} explicit HashtableSize(no_seed_empty_tag_t) : data_(0) {} explicit HashtableSize(full_soo_tag_t) : data_(kSizeOneNoMetadata) {} // Returns actual size of the table. size_t size() const { return static_cast(data_ >> kSizeShift); } void increment_size() { data_ += kSizeOneNoMetadata; } void increment_size(size_t size) { data_ += static_cast(size) * kSizeOneNoMetadata; } void decrement_size() { data_ -= kSizeOneNoMetadata; } // Returns true if the table is empty. bool empty() const { return data_ < kSizeOneNoMetadata; } // Sets the size to zero, but keeps all the metadata bits. void set_size_to_zero_keep_metadata() { data_ = data_ & kMetadataMask; } PerTableSeed seed() const { return PerTableSeed(static_cast(data_) & kSeedMask); } void generate_new_seed() { size_t seed = NextSeedBaseNumber(); data_ = (data_ & ~kSeedMask) | (seed & kSeedMask); } // Returns true if the table has infoz. bool has_infoz() const { return ABSL_PREDICT_FALSE((data_ & kHasInfozMask) != 0); } // Sets the has_infoz bit. void set_has_infoz() { data_ |= kHasInfozMask; } private: static constexpr size_t kSizeShift = 64 - kSizeBitCount; static constexpr uint64_t kSizeOneNoMetadata = uint64_t{1} << kSizeShift; static constexpr uint64_t kMetadataMask = kSizeOneNoMetadata - 1; static constexpr uint64_t kSeedMask = (uint64_t{1} << PerTableSeed::kBitCount) - 1; // The next bit after the seed. static constexpr uint64_t kHasInfozMask = kSeedMask + 1; uint64_t data_; }; // Extracts the H1 portion of a hash: 57 bits mixed with a per-table seed. inline size_t H1(size_t hash, PerTableSeed seed) { return (hash >> 7) ^ seed.seed(); } // Extracts the H2 portion of a hash: the 7 bits not used for H1. // // These are used as an occupied control byte. inline h2_t H2(size_t hash) { return hash & 0x7F; } // Mixes a randomly generated per-process seed with `hash` and `ctrl` to // randomize insertion order within groups. bool ShouldInsertBackwardsForDebug(size_t capacity, size_t hash, PerTableSeed seed); ABSL_ATTRIBUTE_ALWAYS_INLINE inline bool ShouldInsertBackwards( ABSL_ATTRIBUTE_UNUSED size_t capacity, ABSL_ATTRIBUTE_UNUSED size_t hash, ABSL_ATTRIBUTE_UNUSED PerTableSeed seed) { #if defined(NDEBUG) return false; #else return ShouldInsertBackwardsForDebug(capacity, hash, seed); #endif } // Returns insert position for the given mask. // We want to add entropy even when ASLR is not enabled. // In debug build we will randomly insert in either the front or back of // the group. // TODO(kfm,sbenza): revisit after we do unconditional mixing template ABSL_ATTRIBUTE_ALWAYS_INLINE inline auto GetInsertionOffset( Mask mask, ABSL_ATTRIBUTE_UNUSED size_t capacity, ABSL_ATTRIBUTE_UNUSED size_t hash, ABSL_ATTRIBUTE_UNUSED PerTableSeed seed) { #if defined(NDEBUG) return mask.LowestBitSet(); #else return ShouldInsertBackwardsForDebug(capacity, hash, seed) ? mask.HighestBitSet() : mask.LowestBitSet(); #endif } // When there is an insertion with no reserved growth, we rehash with // probability `min(1, RehashProbabilityConstant() / capacity())`. Using a // constant divided by capacity ensures that inserting N elements is still O(N) // in the average case. Using the constant 16 means that we expect to rehash ~8 // times more often than when generations are disabled. We are adding expected // rehash_probability * #insertions/capacity_growth = 16/capacity * ((7/8 - // 7/16) * capacity)/capacity_growth = ~7 extra rehashes per capacity growth. inline size_t RehashProbabilityConstant() { return 16; } class CommonFieldsGenerationInfoEnabled { // A sentinel value for reserved_growth_ indicating that we just ran out of // reserved growth on the last insertion. When reserve is called and then // insertions take place, reserved_growth_'s state machine is N, ..., 1, // kReservedGrowthJustRanOut, 0. static constexpr size_t kReservedGrowthJustRanOut = (std::numeric_limits::max)(); public: CommonFieldsGenerationInfoEnabled() = default; CommonFieldsGenerationInfoEnabled(CommonFieldsGenerationInfoEnabled&& that) : reserved_growth_(that.reserved_growth_), reservation_size_(that.reservation_size_), generation_(that.generation_) { that.reserved_growth_ = 0; that.reservation_size_ = 0; that.generation_ = EmptyGeneration(); } CommonFieldsGenerationInfoEnabled& operator=( CommonFieldsGenerationInfoEnabled&&) = default; // Whether we should rehash on insert in order to detect bugs of using invalid // references. We rehash on the first insertion after reserved_growth_ reaches // 0 after a call to reserve. We also do a rehash with low probability // whenever reserved_growth_ is zero. bool should_rehash_for_bug_detection_on_insert(PerTableSeed seed, size_t capacity) const; // Similar to above, except that we don't depend on reserved_growth_. bool should_rehash_for_bug_detection_on_move(PerTableSeed seed, size_t capacity) const; void maybe_increment_generation_on_insert() { if (reserved_growth_ == kReservedGrowthJustRanOut) reserved_growth_ = 0; if (reserved_growth_ > 0) { if (--reserved_growth_ == 0) reserved_growth_ = kReservedGrowthJustRanOut; } else { increment_generation(); } } void increment_generation() { *generation_ = NextGeneration(*generation_); } void reset_reserved_growth(size_t reservation, size_t size) { reserved_growth_ = reservation - size; } size_t reserved_growth() const { return reserved_growth_; } void set_reserved_growth(size_t r) { reserved_growth_ = r; } size_t reservation_size() const { return reservation_size_; } void set_reservation_size(size_t r) { reservation_size_ = r; } GenerationType generation() const { return *generation_; } void set_generation(GenerationType g) { *generation_ = g; } GenerationType* generation_ptr() const { return generation_; } void set_generation_ptr(GenerationType* g) { generation_ = g; } private: // The number of insertions remaining that are guaranteed to not rehash due to // a prior call to reserve. Note: we store reserved growth in addition to // reservation size because calls to erase() decrease size_ but don't decrease // reserved growth. size_t reserved_growth_ = 0; // The maximum argument to reserve() since the container was cleared. We need // to keep track of this, in addition to reserved growth, because we reset // reserved growth to this when erase(begin(), end()) is called. size_t reservation_size_ = 0; // Pointer to the generation counter, which is used to validate iterators and // is stored in the backing array between the control bytes and the slots. // Note that we can't store the generation inside the container itself and // keep a pointer to the container in the iterators because iterators must // remain valid when the container is moved. // Note: we could derive this pointer from the control pointer, but it makes // the code more complicated, and there's a benefit in having the sizes of // raw_hash_set in sanitizer mode and non-sanitizer mode a bit more different, // which is that tests are less likely to rely on the size remaining the same. GenerationType* generation_ = EmptyGeneration(); }; class CommonFieldsGenerationInfoDisabled { public: CommonFieldsGenerationInfoDisabled() = default; CommonFieldsGenerationInfoDisabled(CommonFieldsGenerationInfoDisabled&&) = default; CommonFieldsGenerationInfoDisabled& operator=( CommonFieldsGenerationInfoDisabled&&) = default; bool should_rehash_for_bug_detection_on_insert(PerTableSeed, size_t) const { return false; } bool should_rehash_for_bug_detection_on_move(PerTableSeed, size_t) const { return false; } void maybe_increment_generation_on_insert() {} void increment_generation() {} void reset_reserved_growth(size_t, size_t) {} size_t reserved_growth() const { return 0; } void set_reserved_growth(size_t) {} size_t reservation_size() const { return 0; } void set_reservation_size(size_t) {} GenerationType generation() const { return 0; } void set_generation(GenerationType) {} GenerationType* generation_ptr() const { return nullptr; } void set_generation_ptr(GenerationType*) {} }; class HashSetIteratorGenerationInfoEnabled { public: HashSetIteratorGenerationInfoEnabled() = default; explicit HashSetIteratorGenerationInfoEnabled( const GenerationType* generation_ptr) : generation_ptr_(generation_ptr), generation_(*generation_ptr) {} GenerationType generation() const { return generation_; } void reset_generation() { generation_ = *generation_ptr_; } const GenerationType* generation_ptr() const { return generation_ptr_; } void set_generation_ptr(const GenerationType* ptr) { generation_ptr_ = ptr; } private: const GenerationType* generation_ptr_ = EmptyGeneration(); GenerationType generation_ = *generation_ptr_; }; class HashSetIteratorGenerationInfoDisabled { public: HashSetIteratorGenerationInfoDisabled() = default; explicit HashSetIteratorGenerationInfoDisabled(const GenerationType*) {} GenerationType generation() const { return 0; } void reset_generation() {} const GenerationType* generation_ptr() const { return nullptr; } void set_generation_ptr(const GenerationType*) {} }; #ifdef ABSL_SWISSTABLE_ENABLE_GENERATIONS using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoEnabled; using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoEnabled; #else using CommonFieldsGenerationInfo = CommonFieldsGenerationInfoDisabled; using HashSetIteratorGenerationInfo = HashSetIteratorGenerationInfoDisabled; #endif // Stored the information regarding number of slots we can still fill // without needing to rehash. // // We want to ensure sufficient number of empty slots in the table in order // to keep probe sequences relatively short. Empty slot in the probe group // is required to stop probing. // // Tombstones (kDeleted slots) are not included in the growth capacity, // because we'd like to rehash when the table is filled with tombstones and/or // full slots. // // GrowthInfo also stores a bit that encodes whether table may have any // deleted slots. // Most of the tables (>95%) have no deleted slots, so some functions can // be more efficient with this information. // // Callers can also force a rehash via the standard `rehash(0)`, // which will recompute this value as a side-effect. // // See also `CapacityToGrowth()`. class GrowthInfo { public: // Leaves data member uninitialized. GrowthInfo() = default; // Initializes the GrowthInfo assuming we can grow `growth_left` elements // and there are no kDeleted slots in the table. void InitGrowthLeftNoDeleted(size_t growth_left) { growth_left_info_ = growth_left; } // Overwrites single full slot with an empty slot. void OverwriteFullAsEmpty() { ++growth_left_info_; } // Overwrites single empty slot with a full slot. void OverwriteEmptyAsFull() { ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() > 0); --growth_left_info_; } // Overwrites several empty slots with full slots. void OverwriteManyEmptyAsFull(size_t count) { ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() >= count); growth_left_info_ -= count; } // Overwrites specified control element with full slot. void OverwriteControlAsFull(ctrl_t ctrl) { ABSL_SWISSTABLE_ASSERT(GetGrowthLeft() >= static_cast(IsEmpty(ctrl))); growth_left_info_ -= static_cast(IsEmpty(ctrl)); } // Overwrites single full slot with a deleted slot. void OverwriteFullAsDeleted() { growth_left_info_ |= kDeletedBit; } // Returns true if table satisfies two properties: // 1. Guaranteed to have no kDeleted slots. // 2. There is a place for at least one element to grow. bool HasNoDeletedAndGrowthLeft() const { return static_cast>(growth_left_info_) > 0; } // Returns true if the table satisfies two properties: // 1. Guaranteed to have no kDeleted slots. // 2. There is no growth left. bool HasNoGrowthLeftAndNoDeleted() const { return growth_left_info_ == 0; } // Returns true if GetGrowthLeft() == 0, but must be called only if // HasNoDeleted() is false. It is slightly more efficient. bool HasNoGrowthLeftAssumingMayHaveDeleted() const { ABSL_SWISSTABLE_ASSERT(!HasNoDeleted()); return growth_left_info_ == kDeletedBit; } // Returns true if table guaranteed to have no kDeleted slots. bool HasNoDeleted() const { return static_cast>(growth_left_info_) >= 0; } // Returns the number of elements left to grow. size_t GetGrowthLeft() const { return growth_left_info_ & kGrowthLeftMask; } private: static constexpr size_t kGrowthLeftMask = ((~size_t{}) >> 1); static constexpr size_t kDeletedBit = ~kGrowthLeftMask; // Topmost bit signal whenever there are deleted slots. size_t growth_left_info_; }; static_assert(sizeof(GrowthInfo) == sizeof(size_t), ""); static_assert(alignof(GrowthInfo) == alignof(size_t), ""); // Returns whether `n` is a valid capacity (i.e., number of slots). // // A valid capacity is a non-zero integer `2^m - 1`. constexpr bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } // Returns the number of "cloned control bytes". // // This is the number of control bytes that are present both at the beginning // of the control byte array and at the end, such that we can create a // `Group::kWidth`-width probe window starting from any control byte. constexpr size_t NumClonedBytes() { return Group::kWidth - 1; } // Returns the number of control bytes including cloned. constexpr size_t NumControlBytes(size_t capacity) { return capacity + 1 + NumClonedBytes(); } // Computes the offset from the start of the backing allocation of control. // infoz and growth_info are stored at the beginning of the backing array. constexpr size_t ControlOffset(bool has_infoz) { return (has_infoz ? sizeof(HashtablezInfoHandle) : 0) + sizeof(GrowthInfo); } // Helper class for computing offsets and allocation size of hash set fields. class RawHashSetLayout { public: explicit RawHashSetLayout(size_t capacity, size_t slot_size, size_t slot_align, bool has_infoz) : control_offset_(ControlOffset(has_infoz)), generation_offset_(control_offset_ + NumControlBytes(capacity)), slot_offset_( (generation_offset_ + NumGenerationBytes() + slot_align - 1) & (~slot_align + 1)), alloc_size_(slot_offset_ + capacity * slot_size) { ABSL_SWISSTABLE_ASSERT(IsValidCapacity(capacity)); ABSL_SWISSTABLE_ASSERT( slot_size <= ((std::numeric_limits::max)() - slot_offset_) / capacity); } // Returns precomputed offset from the start of the backing allocation of // control. size_t control_offset() const { return control_offset_; } // Given the capacity of a table, computes the offset (from the start of the // backing allocation) of the generation counter (if it exists). size_t generation_offset() const { return generation_offset_; } // Given the capacity of a table, computes the offset (from the start of the // backing allocation) at which the slots begin. size_t slot_offset() const { return slot_offset_; } // Given the capacity of a table, computes the total size of the backing // array. size_t alloc_size() const { return alloc_size_; } private: size_t control_offset_; size_t generation_offset_; size_t slot_offset_; size_t alloc_size_; }; struct HashtableFreeFunctionsAccess; // Suppress erroneous uninitialized memory errors on GCC. For example, GCC // thinks that the call to slot_array() in find_or_prepare_insert() is reading // uninitialized memory, but slot_array is only called there when the table is // non-empty and this memory is initialized when the table is non-empty. #if !defined(__clang__) && defined(__GNUC__) #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) \ _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") \ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") x; \ _Pragma("GCC diagnostic pop") #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) \ ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(return x) #else #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(x) x #define ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(x) return x #endif // This allows us to work around an uninitialized memory warning when // constructing begin() iterators in empty hashtables. union MaybeInitializedPtr { void* get() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(p); } void set(void* ptr) { p = ptr; } void* p; }; struct HeapPtrs { explicit HeapPtrs(uninitialized_tag_t) {} explicit HeapPtrs(ctrl_t* c) : control(c) {} // The control bytes (and, also, a pointer near to the base of the backing // array). // // This contains `capacity + 1 + NumClonedBytes()` entries, even // when the table is empty (hence EmptyGroup). // // Note that growth_info is stored immediately before this pointer. // May be uninitialized for SOO tables. ctrl_t* control; // The beginning of the slots, located at `SlotOffset()` bytes after // `control`. May be uninitialized for empty tables. // Note: we can't use `slots` because Qt defines "slots" as a macro. MaybeInitializedPtr slot_array; }; // Returns the maximum size of the SOO slot. constexpr size_t MaxSooSlotSize() { return sizeof(HeapPtrs); } // Manages the backing array pointers or the SOO slot. When raw_hash_set::is_soo // is true, the SOO slot is stored in `soo_data`. Otherwise, we use `heap`. union HeapOrSoo { explicit HeapOrSoo(uninitialized_tag_t) : heap(uninitialized_tag_t{}) {} explicit HeapOrSoo(ctrl_t* c) : heap(c) {} ctrl_t*& control() { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); } ctrl_t* control() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.control); } MaybeInitializedPtr& slot_array() { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); } MaybeInitializedPtr slot_array() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(heap.slot_array); } void* get_soo_data() { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); } const void* get_soo_data() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(soo_data); } HeapPtrs heap; unsigned char soo_data[MaxSooSlotSize()]; }; // CommonFields hold the fields in raw_hash_set that do not depend // on template parameters. This allows us to conveniently pass all // of this state to helper functions as a single argument. class CommonFields : public CommonFieldsGenerationInfo { public: explicit CommonFields(soo_tag_t) : capacity_(SooCapacity()), size_(no_seed_empty_tag_t{}), heap_or_soo_(uninitialized_tag_t{}) {} explicit CommonFields(full_soo_tag_t) : capacity_(SooCapacity()), size_(full_soo_tag_t{}), heap_or_soo_(uninitialized_tag_t{}) {} explicit CommonFields(non_soo_tag_t) : capacity_(0), size_(no_seed_empty_tag_t{}), heap_or_soo_(EmptyGroup()) {} // For use in swapping. explicit CommonFields(uninitialized_tag_t) : size_(uninitialized_tag_t{}), heap_or_soo_(uninitialized_tag_t{}) {} // Not copyable CommonFields(const CommonFields&) = delete; CommonFields& operator=(const CommonFields&) = delete; // Copy with guarantee that it is not SOO. CommonFields(non_soo_tag_t, const CommonFields& that) : capacity_(that.capacity_), size_(that.size_), heap_or_soo_(that.heap_or_soo_) { } // Movable CommonFields(CommonFields&& that) = default; CommonFields& operator=(CommonFields&&) = default; template static CommonFields CreateDefault() { return kSooEnabled ? CommonFields{soo_tag_t{}} : CommonFields{non_soo_tag_t{}}; } // The inline data for SOO is written on top of control_/slots_. const void* soo_data() const { return heap_or_soo_.get_soo_data(); } void* soo_data() { return heap_or_soo_.get_soo_data(); } ctrl_t* control() const { return heap_or_soo_.control(); } // When we set the control bytes, we also often want to generate a new seed. // So we bundle these two operations together to make sure we don't forget to // generate a new seed. // The table will be invalidated if // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is // being changed. In such cases, we will need to rehash the table. template void set_control(ctrl_t* c) { heap_or_soo_.control() = c; if constexpr (kGenerateSeed) { generate_new_seed(); } } void* backing_array_start() const { // growth_info (and maybe infoz) is stored before control bytes. ABSL_SWISSTABLE_ASSERT( reinterpret_cast(control()) % alignof(size_t) == 0); return control() - ControlOffset(has_infoz()); } // Note: we can't use slots() because Qt defines "slots" as a macro. void* slot_array() const { return heap_or_soo_.slot_array().get(); } MaybeInitializedPtr slots_union() const { return heap_or_soo_.slot_array(); } void set_slots(void* s) { heap_or_soo_.slot_array().set(s); } // The number of filled slots. size_t size() const { return size_.size(); } // Sets the size to zero, but keeps hashinfoz bit and seed. void set_size_to_zero() { size_.set_size_to_zero_keep_metadata(); } void set_empty_soo() { AssertInSooMode(); size_ = HashtableSize(no_seed_empty_tag_t{}); } void set_full_soo() { AssertInSooMode(); size_ = HashtableSize(full_soo_tag_t{}); } void increment_size() { ABSL_SWISSTABLE_ASSERT(size() < capacity()); size_.increment_size(); } void increment_size(size_t n) { ABSL_SWISSTABLE_ASSERT(size() + n <= capacity()); size_.increment_size(n); } void decrement_size() { ABSL_SWISSTABLE_ASSERT(!empty()); size_.decrement_size(); } bool empty() const { return size_.empty(); } // The seed used for the H1 part of the hash function. PerTableSeed seed() const { return size_.seed(); } // Generates a new seed for the H1 part of the hash function. // The table will be invalidated if // `kGenerateSeed && !empty() && !is_single_group(capacity())` because H1 is // being changed. In such cases, we will need to rehash the table. void generate_new_seed() { size_.generate_new_seed(); } // The total number of available slots. size_t capacity() const { return capacity_; } void set_capacity(size_t c) { // We allow setting above the max valid capacity for debugging purposes. ABSL_SWISSTABLE_ASSERT(c == 0 || IsValidCapacity(c) || c > kAboveMaxValidCapacity); capacity_ = c; } // The number of slots we can still fill without needing to rehash. // This is stored in the heap allocation before the control bytes. // TODO(b/289225379): experiment with moving growth_info back inline to // increase room for SOO. size_t growth_left() const { return growth_info().GetGrowthLeft(); } GrowthInfo& growth_info() { auto* gl_ptr = reinterpret_cast(control()) - 1; ABSL_SWISSTABLE_ASSERT( reinterpret_cast(gl_ptr) % alignof(GrowthInfo) == 0); return *gl_ptr; } GrowthInfo growth_info() const { return const_cast(this)->growth_info(); } bool has_infoz() const { return size_.has_infoz(); } void set_has_infoz() { size_.set_has_infoz(); } HashtablezInfoHandle infoz() { return has_infoz() ? *reinterpret_cast(backing_array_start()) : HashtablezInfoHandle(); } void set_infoz(HashtablezInfoHandle infoz) { ABSL_SWISSTABLE_ASSERT(has_infoz()); *reinterpret_cast(backing_array_start()) = infoz; } bool should_rehash_for_bug_detection_on_insert() const { if constexpr (!SwisstableGenerationsEnabled()) { return false; } // As an optimization, we avoid calling ShouldRehashForBugDetection if we // will end up rehashing anyways. if (growth_left() == 0) return false; return CommonFieldsGenerationInfo:: should_rehash_for_bug_detection_on_insert(seed(), capacity()); } bool should_rehash_for_bug_detection_on_move() const { return CommonFieldsGenerationInfo::should_rehash_for_bug_detection_on_move( seed(), capacity()); } void reset_reserved_growth(size_t reservation) { CommonFieldsGenerationInfo::reset_reserved_growth(reservation, size()); } // The size of the backing array allocation. size_t alloc_size(size_t slot_size, size_t slot_align) const { return RawHashSetLayout(capacity(), slot_size, slot_align, has_infoz()) .alloc_size(); } // Move fields other than heap_or_soo_. void move_non_heap_or_soo_fields(CommonFields& that) { static_cast(*this) = std::move(static_cast(that)); capacity_ = that.capacity_; size_ = that.size_; } // Returns the number of control bytes set to kDeleted. For testing only. size_t TombstonesCount() const { return static_cast( std::count(control(), control() + capacity(), ctrl_t::kDeleted)); } // Helper to enable sanitizer mode validation to protect against reentrant // calls during element constructor/destructor. template void RunWithReentrancyGuard(F f) { #ifdef NDEBUG f(); return; #endif const size_t cap = capacity(); set_capacity(InvalidCapacity::kReentrance); f(); set_capacity(cap); } private: // We store the has_infoz bit in the lowest bit of size_. static constexpr size_t HasInfozShift() { return 1; } static constexpr size_t HasInfozMask() { return (size_t{1} << HasInfozShift()) - 1; } // We can't assert that SOO is enabled because we don't have SooEnabled(), but // we assert what we can. void AssertInSooMode() const { ABSL_SWISSTABLE_ASSERT(capacity() == SooCapacity()); ABSL_SWISSTABLE_ASSERT(!has_infoz()); } // The number of slots in the backing array. This is always 2^N-1 for an // integer N. NOTE: we tried experimenting with compressing the capacity and // storing it together with size_: (a) using 6 bits to store the corresponding // power (N in 2^N-1), and (b) storing 2^N as the most significant bit of // size_ and storing size in the low bits. Both of these experiments were // regressions, presumably because we need capacity to do find operations. size_t capacity_; // TODO(b/289225379): we could put size_ into HeapOrSoo and make capacity_ // encode the size in SOO case. We would be making size()/capacity() more // expensive in order to have more SOO space. HashtableSize size_; // Either the control/slots pointers or the SOO slot. HeapOrSoo heap_or_soo_; }; template class raw_hash_set; // Returns the next valid capacity after `n`. constexpr size_t NextCapacity(size_t n) { ABSL_SWISSTABLE_ASSERT(IsValidCapacity(n) || n == 0); return n * 2 + 1; } // Applies the following mapping to every byte in the control array: // * kDeleted -> kEmpty // * kEmpty -> kEmpty // * _ -> kDeleted // PRECONDITION: // IsValidCapacity(capacity) // ctrl[capacity] == ctrl_t::kSentinel // ctrl[i] != ctrl_t::kSentinel for all i < capacity void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity); // Converts `n` into the next valid capacity, per `IsValidCapacity`. constexpr size_t NormalizeCapacity(size_t n) { return n ? ~size_t{} >> countl_zero(n) : 1; } // General notes on capacity/growth methods below: // - We use 7/8th as maximum load factor. For 16-wide groups, that gives an // average of two empty slots per group. // - For (capacity+1) >= Group::kWidth, growth is 7/8*capacity. // - For (capacity+1) < Group::kWidth, growth == capacity. In this case, we // never need to probe (the whole table fits in one group) so we don't need a // load factor less than 1. // Given `capacity`, applies the load factor; i.e., it returns the maximum // number of values we should put into the table before a resizing rehash. constexpr size_t CapacityToGrowth(size_t capacity) { ABSL_SWISSTABLE_ASSERT(IsValidCapacity(capacity)); // `capacity*7/8` if (Group::kWidth == 8 && capacity == 7) { // x-x/8 does not work when x==7. return 6; } return capacity - capacity / 8; } // Given `growth`, "unapplies" the load factor to find how large the capacity // should be to stay within the load factor. // // This might not be a valid capacity and `NormalizeCapacity()` should be // called on this. constexpr size_t GrowthToLowerboundCapacity(size_t growth) { // `growth*8/7` if (Group::kWidth == 8 && growth == 7) { // x+(x-1)/7 does not work when x==7. return 8; } return growth + static_cast((static_cast(growth) - 1) / 7); } template size_t SelectBucketCountForIterRange(InputIter first, InputIter last, size_t bucket_count) { if (bucket_count != 0) { return bucket_count; } if (base_internal::IsAtLeastIterator()) { return GrowthToLowerboundCapacity( static_cast(std::distance(first, last))); } return 0; } constexpr bool SwisstableDebugEnabled() { #if defined(ABSL_SWISSTABLE_ENABLE_GENERATIONS) || \ ABSL_OPTION_HARDENED == 1 || !defined(NDEBUG) return true; #else return false; #endif } inline void AssertIsFull(const ctrl_t* ctrl, GenerationType generation, const GenerationType* generation_ptr, const char* operation) { if (!SwisstableDebugEnabled()) return; // `SwisstableDebugEnabled()` is also true for release builds with hardening // enabled. To minimize their impact in those builds: // - use `ABSL_PREDICT_FALSE()` to provide a compiler hint for code layout // - use `ABSL_RAW_LOG()` with a format string to reduce code size and improve // the chances that the hot paths will be inlined. if (ABSL_PREDICT_FALSE(ctrl == nullptr)) { ABSL_RAW_LOG(FATAL, "%s called on end() iterator.", operation); } if (ABSL_PREDICT_FALSE(ctrl == EmptyGroup())) { ABSL_RAW_LOG(FATAL, "%s called on default-constructed iterator.", operation); } if (SwisstableGenerationsEnabled()) { if (ABSL_PREDICT_FALSE(generation != *generation_ptr)) { ABSL_RAW_LOG(FATAL, "%s called on invalid iterator. The table could have " "rehashed or moved since this iterator was initialized.", operation); } if (ABSL_PREDICT_FALSE(!IsFull(*ctrl))) { ABSL_RAW_LOG( FATAL, "%s called on invalid iterator. The element was likely erased.", operation); } } else { if (ABSL_PREDICT_FALSE(!IsFull(*ctrl))) { ABSL_RAW_LOG( FATAL, "%s called on invalid iterator. The element might have been erased " "or the table might have rehashed. Consider running with " "--config=asan to diagnose rehashing issues.", operation); } } } // Note that for comparisons, null/end iterators are valid. inline void AssertIsValidForComparison(const ctrl_t* ctrl, GenerationType generation, const GenerationType* generation_ptr) { if (!SwisstableDebugEnabled()) return; const bool ctrl_is_valid_for_comparison = ctrl == nullptr || ctrl == EmptyGroup() || IsFull(*ctrl); if (SwisstableGenerationsEnabled()) { if (ABSL_PREDICT_FALSE(generation != *generation_ptr)) { ABSL_RAW_LOG(FATAL, "Invalid iterator comparison. The table could have rehashed " "or moved since this iterator was initialized."); } if (ABSL_PREDICT_FALSE(!ctrl_is_valid_for_comparison)) { ABSL_RAW_LOG( FATAL, "Invalid iterator comparison. The element was likely erased."); } } else { ABSL_HARDENING_ASSERT( ctrl_is_valid_for_comparison && "Invalid iterator comparison. The element might have been erased or " "the table might have rehashed. Consider running with --config=asan to " "diagnose rehashing issues."); } } // If the two iterators come from the same container, then their pointers will // interleave such that ctrl_a <= ctrl_b < slot_a <= slot_b or vice/versa. // Note: we take slots by reference so that it's not UB if they're uninitialized // as long as we don't read them (when ctrl is null). inline bool AreItersFromSameContainer(const ctrl_t* ctrl_a, const ctrl_t* ctrl_b, const void* const& slot_a, const void* const& slot_b) { // If either control byte is null, then we can't tell. if (ctrl_a == nullptr || ctrl_b == nullptr) return true; const bool a_is_soo = IsSooControl(ctrl_a); if (a_is_soo != IsSooControl(ctrl_b)) return false; if (a_is_soo) return slot_a == slot_b; const void* low_slot = slot_a; const void* hi_slot = slot_b; if (ctrl_a > ctrl_b) { std::swap(ctrl_a, ctrl_b); std::swap(low_slot, hi_slot); } return ctrl_b < low_slot && low_slot <= hi_slot; } // Asserts that two iterators come from the same container. // Note: we take slots by reference so that it's not UB if they're uninitialized // as long as we don't read them (when ctrl is null). inline void AssertSameContainer(const ctrl_t* ctrl_a, const ctrl_t* ctrl_b, const void* const& slot_a, const void* const& slot_b, const GenerationType* generation_ptr_a, const GenerationType* generation_ptr_b) { if (!SwisstableDebugEnabled()) return; // `SwisstableDebugEnabled()` is also true for release builds with hardening // enabled. To minimize their impact in those builds: // - use `ABSL_PREDICT_FALSE()` to provide a compiler hint for code layout // - use `ABSL_RAW_LOG()` with a format string to reduce code size and improve // the chances that the hot paths will be inlined. // fail_if(is_invalid, message) crashes when is_invalid is true and provides // an error message based on `message`. const auto fail_if = [](bool is_invalid, const char* message) { if (ABSL_PREDICT_FALSE(is_invalid)) { ABSL_RAW_LOG(FATAL, "Invalid iterator comparison. %s", message); } }; const bool a_is_default = ctrl_a == EmptyGroup(); const bool b_is_default = ctrl_b == EmptyGroup(); if (a_is_default && b_is_default) return; fail_if(a_is_default != b_is_default, "Comparing default-constructed hashtable iterator with a " "non-default-constructed hashtable iterator."); if (SwisstableGenerationsEnabled()) { if (ABSL_PREDICT_TRUE(generation_ptr_a == generation_ptr_b)) return; // Users don't need to know whether the tables are SOO so don't mention SOO // in the debug message. const bool a_is_soo = IsSooControl(ctrl_a); const bool b_is_soo = IsSooControl(ctrl_b); fail_if(a_is_soo != b_is_soo || (a_is_soo && b_is_soo), "Comparing iterators from different hashtables."); const bool a_is_empty = IsEmptyGeneration(generation_ptr_a); const bool b_is_empty = IsEmptyGeneration(generation_ptr_b); fail_if(a_is_empty != b_is_empty, "Comparing an iterator from an empty hashtable with an iterator " "from a non-empty hashtable."); fail_if(a_is_empty && b_is_empty, "Comparing iterators from different empty hashtables."); const bool a_is_end = ctrl_a == nullptr; const bool b_is_end = ctrl_b == nullptr; fail_if(a_is_end || b_is_end, "Comparing iterator with an end() iterator from a different " "hashtable."); fail_if(true, "Comparing non-end() iterators from different hashtables."); } else { ABSL_HARDENING_ASSERT_SLOW( AreItersFromSameContainer(ctrl_a, ctrl_b, slot_a, slot_b) && "Invalid iterator comparison. The iterators may be from different " "containers or the container might have rehashed or moved. Consider " "running with --config=asan to diagnose issues."); } } struct FindInfo { size_t offset; size_t probe_length; }; // Whether a table is "small". A small table fits entirely into a probing // group, i.e., has a capacity < `Group::kWidth`. // // In small mode we are able to use the whole capacity. The extra control // bytes give us at least one "empty" control byte to stop the iteration. // This is important to make 1 a valid capacity. // // In small mode only the first `capacity` control bytes after the sentinel // are valid. The rest contain dummy ctrl_t::kEmpty values that do not // represent a real slot. This is important to take into account on // `find_first_non_full()`, where we never try // `ShouldInsertBackwards()` for small tables. constexpr bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } // Whether a table fits entirely into a probing group. // Arbitrary order of elements in such tables is correct. constexpr bool is_single_group(size_t capacity) { return capacity <= Group::kWidth; } // Begins a probing operation on `common.control`, using `hash`. inline probe_seq probe(PerTableSeed seed, const size_t capacity, size_t hash) { return probe_seq(H1(hash, seed), capacity); } inline probe_seq probe(const CommonFields& common, size_t hash) { return probe(common.seed(), common.capacity(), hash); } // Probes an array of control bits using a probe sequence derived from `hash`, // and returns the offset corresponding to the first deleted or empty slot. // // Behavior when the entire table is full is undefined. // // NOTE: this function must work with tables having both empty and deleted // slots in the same group. Such tables appear during `erase()`. template inline FindInfo find_first_non_full(const CommonFields& common, size_t hash) { auto seq = probe(common, hash); const ctrl_t* ctrl = common.control(); const PerTableSeed seed = common.seed(); if (IsEmptyOrDeleted(ctrl[seq.offset()]) && !ShouldInsertBackwards(common.capacity(), hash, seed)) { return {seq.offset(), /*probe_length=*/0}; } while (true) { GroupFullEmptyOrDeleted g{ctrl + seq.offset()}; auto mask = g.MaskEmptyOrDeleted(); if (mask) { return { seq.offset(GetInsertionOffset(mask, common.capacity(), hash, seed)), seq.index()}; } seq.next(); ABSL_SWISSTABLE_ASSERT(seq.index() <= common.capacity() && "full table!"); } } // Extern template for inline function keep possibility of inlining. // When compiler decided to not inline, no symbols will be added to the // corresponding translation unit. extern template FindInfo find_first_non_full(const CommonFields&, size_t); // Non-inlined version of find_first_non_full for use in less // performance critical routines. FindInfo find_first_non_full_outofline(const CommonFields&, size_t); // Sets sanitizer poisoning for slot corresponding to control byte being set. inline void DoSanitizeOnSetCtrl(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { ABSL_SWISSTABLE_ASSERT(i < c.capacity()); auto* slot_i = static_cast(c.slot_array()) + i * slot_size; if (IsFull(h)) { SanitizerUnpoisonMemoryRegion(slot_i, slot_size); } else { SanitizerPoisonMemoryRegion(slot_i, slot_size); } } // Sets `ctrl[i]` to `h`. // // Unlike setting it directly, this function will perform bounds checks and // mirror the value to the cloned tail if necessary. inline void SetCtrl(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { DoSanitizeOnSetCtrl(c, i, h, slot_size); ctrl_t* ctrl = c.control(); ctrl[i] = h; ctrl[((i - NumClonedBytes()) & c.capacity()) + (NumClonedBytes() & c.capacity())] = h; } // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. inline void SetCtrl(const CommonFields& c, size_t i, h2_t h, size_t slot_size) { SetCtrl(c, i, static_cast(h), slot_size); } // Like SetCtrl, but in a single group table, we can save some operations when // setting the cloned control byte. inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { ABSL_SWISSTABLE_ASSERT(is_single_group(c.capacity())); DoSanitizeOnSetCtrl(c, i, h, slot_size); ctrl_t* ctrl = c.control(); ctrl[i] = h; ctrl[i + c.capacity() + 1] = h; } // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. inline void SetCtrlInSingleGroupTable(const CommonFields& c, size_t i, h2_t h, size_t slot_size) { SetCtrlInSingleGroupTable(c, i, static_cast(h), slot_size); } // Like SetCtrl, but in a table with capacity >= Group::kWidth - 1, // we can save some operations when setting the cloned control byte. inline void SetCtrlInLargeTable(const CommonFields& c, size_t i, ctrl_t h, size_t slot_size) { ABSL_SWISSTABLE_ASSERT(c.capacity() >= Group::kWidth - 1); DoSanitizeOnSetCtrl(c, i, h, slot_size); ctrl_t* ctrl = c.control(); ctrl[i] = h; ctrl[((i - NumClonedBytes()) & c.capacity()) + NumClonedBytes()] = h; } // Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. inline void SetCtrlInLargeTable(const CommonFields& c, size_t i, h2_t h, size_t slot_size) { SetCtrlInLargeTable(c, i, static_cast(h), slot_size); } // growth_info (which is a size_t) is stored with the backing array. constexpr size_t BackingArrayAlignment(size_t align_of_slot) { return (std::max)(align_of_slot, alignof(GrowthInfo)); } // Returns the address of the ith slot in slots where each slot occupies // slot_size. inline void* SlotAddress(void* slot_array, size_t slot, size_t slot_size) { return static_cast(static_cast(slot_array) + (slot * slot_size)); } // Iterates over all full slots and calls `cb(const ctrl_t*, void*)`. // No insertion to the table is allowed during `cb` call. // Erasure is allowed only for the element passed to the callback. // The table must not be in SOO mode. void IterateOverFullSlots(const CommonFields& c, size_t slot_size, absl::FunctionRef cb); template constexpr bool ShouldSampleHashtablezInfoForAlloc() { // Folks with custom allocators often make unwarranted assumptions about the // behavior of their classes vis-a-vis trivial destructability and what // calls they will or won't make. Avoid sampling for people with custom // allocators to get us out of this mess. This is not a hard guarantee but // a workaround while we plan the exact guarantee we want to provide. return std::is_same_v>; } template bool ShouldSampleHashtablezInfoOnResize(bool force_sampling, bool is_hashtablez_eligible, size_t old_capacity, CommonFields& c) { if (!is_hashtablez_eligible) return false; // Force sampling is only allowed for SOO tables. ABSL_SWISSTABLE_ASSERT(kSooEnabled || !force_sampling); if (kSooEnabled && force_sampling) { return true; } // In SOO, we sample on the first insertion so if this is an empty SOO case // (e.g. when reserve is called), then we still need to sample. if (kSooEnabled && old_capacity == SooCapacity() && c.empty()) { return ShouldSampleNextTable(); } if (!kSooEnabled && old_capacity == 0) { return ShouldSampleNextTable(); } return false; } // Allocates `n` bytes for a backing array. template ABSL_ATTRIBUTE_NOINLINE void* AllocateBackingArray(void* alloc, size_t n) { return Allocate(static_cast(alloc), n); } template ABSL_ATTRIBUTE_NOINLINE void DeallocateBackingArray( void* alloc, size_t capacity, ctrl_t* ctrl, size_t slot_size, size_t slot_align, bool had_infoz) { RawHashSetLayout layout(capacity, slot_size, slot_align, had_infoz); void* backing_array = ctrl - layout.control_offset(); // Unpoison before returning the memory to the allocator. SanitizerUnpoisonMemoryRegion(backing_array, layout.alloc_size()); Deallocate(static_cast(alloc), backing_array, layout.alloc_size()); } // PolicyFunctions bundles together some information for a particular // raw_hash_set instantiation. This information is passed to // type-erased functions that want to do small amounts of type-specific // work. struct PolicyFunctions { uint32_t key_size; uint32_t value_size; uint32_t slot_size; uint16_t slot_align; uint8_t soo_capacity; bool is_hashtablez_eligible; // Returns the pointer to the hash function stored in the set. void* (*hash_fn)(CommonFields& common); // Returns the hash of the pointed-to slot. size_t (*hash_slot)(const void* hash_fn, void* slot); // Transfers the contents of `count` slots from src_slot to dst_slot. // We use ability to transfer several slots in single group table growth. void (*transfer_n)(void* set, void* dst_slot, void* src_slot, size_t count); // Returns the pointer to the CharAlloc stored in the set. void* (*get_char_alloc)(CommonFields& common); // Allocates n bytes for the backing store for common. void* (*alloc)(void* alloc, size_t n); // Deallocates the backing store from common. void (*dealloc)(void* alloc, size_t capacity, ctrl_t* ctrl, size_t slot_size, size_t slot_align, bool had_infoz); // Iterates over full slots in old table, finds new positions // for them and transfers the slots. // Returns the total probe length. size_t (*find_new_positions_and_transfer_slots)(CommonFields& common, ctrl_t* old_ctrl, void* old_slots, size_t old_capacity); }; // Returns the maximum valid size for a table with 1-byte slots. // This function is an utility shared by MaxValidSize and IsAboveValidSize. // Template parameter is only used to enable testing. template constexpr size_t MaxValidSizeFor1ByteSlot() { if constexpr (kSizeOfSizeT == 8) { return CapacityToGrowth( static_cast(uint64_t{1} << HashtableSize::kSizeBitCount) - 1); } else { static_assert(kSizeOfSizeT == 4); return CapacityToGrowth((size_t{1} << (kSizeOfSizeT * 8 - 2)) - 1); } } // Returns the maximum valid size for a table with provided slot size. // Template parameter is only used to enable testing. template constexpr size_t MaxValidSize(size_t slot_size) { if constexpr (kSizeOfSizeT == 8) { // For small slot sizes we are limited by HashtableSize::kSizeBitCount. if (slot_size < size_t{1} << (64 - HashtableSize::kSizeBitCount)) { return MaxValidSizeFor1ByteSlot(); } return (size_t{1} << (kSizeOfSizeT * 8 - 2)) / slot_size; } else { return MaxValidSizeFor1ByteSlot() / slot_size; } } // Returns true if size is larger than the maximum valid size. // It is an optimization to avoid the division operation in the common case. // Template parameter is only used to enable testing. template constexpr bool IsAboveValidSize(size_t size, size_t slot_size) { if constexpr (kSizeOfSizeT == 8) { // For small slot sizes we are limited by HashtableSize::kSizeBitCount. if (ABSL_PREDICT_TRUE(slot_size < (size_t{1} << (64 - HashtableSize::kSizeBitCount)))) { return size > MaxValidSizeFor1ByteSlot(); } return size > MaxValidSize(slot_size); } else { return uint64_t{size} * slot_size > MaxValidSizeFor1ByteSlot(); } } // Returns the index of the SOO slot when growing from SOO to non-SOO in a // single group. See also InitializeSmallControlBytesAfterSoo(). It's important // to use index 1 so that when resizing from capacity 1 to 3, we can still have // random iteration order between the first two inserted elements. // I.e. it allows inserting the second element at either index 0 or 2. constexpr size_t SooSlotIndex() { return 1; } // Maximum capacity for the algorithm for small table after SOO. // Note that typical size after SOO is 3, but we allow up to 7. // Allowing till 16 would require additional store that can be avoided. constexpr size_t MaxSmallAfterSooCapacity() { return 7; } // Resizes empty non-allocated table to the capacity to fit new_size elements. // Requires: // 1. `c.capacity() == policy.soo_capacity`. // 2. `c.empty()`. // 3. `new_size > policy.soo_capacity`. // The table will be attempted to be sampled. void ReserveEmptyNonAllocatedTableToFitNewSize(CommonFields& common, const PolicyFunctions& policy, size_t new_size); // The same as ReserveEmptyNonAllocatedTableToFitNewSize, but resizes to the // next valid capacity after `bucket_count`. void ReserveEmptyNonAllocatedTableToFitBucketCount( CommonFields& common, const PolicyFunctions& policy, size_t bucket_count); // Resizes empty non-allocated SOO table to NextCapacity(SooCapacity()) and // forces the table to be sampled. // SOO tables need to switch from SOO to heap in order to store the infoz. // Requires: // 1. `c.capacity() == SooCapacity()`. // 2. `c.empty()`. void GrowEmptySooTableToNextCapacityForceSampling( CommonFields& common, const PolicyFunctions& policy); // Type erased version of raw_hash_set::rehash. void Rehash(CommonFields& common, const PolicyFunctions& policy, size_t n); // Type erased version of raw_hash_set::reserve for tables that have an // allocated backing array. // // Requires: // 1. `c.capacity() > policy.soo_capacity` OR `!c.empty()`. // Reserving already allocated tables is considered to be a rare case. void ReserveAllocatedTable(CommonFields& common, const PolicyFunctions& policy, size_t n); // Returns the optimal size for memcpy when transferring SOO slot. // Otherwise, returns the optimal size for memcpy SOO slot transfer // to SooSlotIndex(). // At the destination we are allowed to copy upto twice more bytes, // because there is at least one more slot after SooSlotIndex(). // The result must not exceed MaxSooSlotSize(). // Some of the cases are merged to minimize the number of function // instantiations. constexpr size_t OptimalMemcpySizeForSooSlotTransfer( size_t slot_size, size_t max_soo_slot_size = MaxSooSlotSize()) { static_assert(MaxSooSlotSize() >= 8, "unexpectedly small SOO slot size"); if (slot_size == 1) { return 1; } if (slot_size <= 3) { return 4; } // We are merging 4 and 8 into one case because we expect them to be the // hottest cases. Copying 8 bytes is as fast on common architectures. if (slot_size <= 8) { return 8; } if (max_soo_slot_size <= 16) { return max_soo_slot_size; } if (slot_size <= 16) { return 16; } if (max_soo_slot_size <= 24) { return max_soo_slot_size; } static_assert(MaxSooSlotSize() <= 24, "unexpectedly large SOO slot size"); return 24; } // Resizes a full SOO table to the NextCapacity(SooCapacity()). // All possible template combinations are defined in cc file to improve // compilation time. template void GrowFullSooTableToNextCapacity(CommonFields& common, const PolicyFunctions& policy, size_t soo_slot_hash); // As `ResizeFullSooTableToNextCapacity`, except that we also force the SOO // table to be sampled. SOO tables need to switch from SOO to heap in order to // store the infoz. No-op if sampling is disabled or not possible. void GrowFullSooTableToNextCapacityForceSampling(CommonFields& common, const PolicyFunctions& policy); // Resizes table with allocated slots and change the table seed. // Tables with SOO enabled must have capacity > policy.soo_capacity. // No sampling will be performed since table is already allocated. void ResizeAllocatedTableWithSeedChange(CommonFields& common, const PolicyFunctions& policy, size_t new_capacity); inline void PrepareInsertCommon(CommonFields& common) { common.increment_size(); common.maybe_increment_generation_on_insert(); } // Like prepare_insert, but for the case of inserting into a full SOO table. size_t PrepareInsertAfterSoo(size_t hash, size_t slot_size, CommonFields& common); // ClearBackingArray clears the backing array, either modifying it in place, // or creating a new one based on the value of "reuse". // REQUIRES: c.capacity > 0 void ClearBackingArray(CommonFields& c, const PolicyFunctions& policy, void* alloc, bool reuse, bool soo_enabled); // Type-erased version of raw_hash_set::erase_meta_only. void EraseMetaOnly(CommonFields& c, size_t index, size_t slot_size); // For trivially relocatable types we use memcpy directly. This allows us to // share the same function body for raw_hash_set instantiations that have the // same slot size as long as they are relocatable. // Separate function for relocating single slot cause significant binary bloat. template ABSL_ATTRIBUTE_NOINLINE void TransferNRelocatable(void*, void* dst, void* src, size_t count) { // TODO(b/382423690): Experiment with making specialization for power of 2 and // non power of 2. This would require passing the size of the slot. memcpy(dst, src, SizeOfSlot * count); } // Returns a pointer to `common`. This is used to implement type erased // raw_hash_set::get_hash_ref_fn and raw_hash_set::get_alloc_ref_fn for the // empty class cases. void* GetRefForEmptyClass(CommonFields& common); // Given the hash of a value not currently in the table and the first empty // slot in the probe sequence, finds a viable slot index to insert it at. // // In case there's no space left, the table can be resized or rehashed // (for tables with deleted slots, see FindInsertPositionWithGrowthOrRehash). // // In the case of absence of deleted slots and positive growth_left, the element // can be inserted in the provided `target` position. // // When the table has deleted slots (according to GrowthInfo), the target // position will be searched one more time using `find_first_non_full`. // // REQUIRES: Table is not SOO. // REQUIRES: At least one non-full slot available. // REQUIRES: `target` is a valid empty position to insert. size_t PrepareInsertNonSoo(CommonFields& common, const PolicyFunctions& policy, size_t hash, FindInfo target); // A SwissTable. // // Policy: a policy defines how to perform different operations on // the slots of the hashtable (see hash_policy_traits.h for the full interface // of policy). // // Hash: a (possibly polymorphic) functor that hashes keys of the hashtable. The // functor should accept a key and return size_t as hash. For best performance // it is important that the hash function provides high entropy across all bits // of the hash. // // Eq: a (possibly polymorphic) functor that compares two keys for equality. It // should accept two (of possibly different type) keys and return a bool: true // if they are equal, false if they are not. If two keys compare equal, then // their hash values as defined by Hash MUST be equal. // // Allocator: an Allocator // [https://en.cppreference.com/w/cpp/named_req/Allocator] with which // the storage of the hashtable will be allocated and the elements will be // constructed and destroyed. template class raw_hash_set { using PolicyTraits = hash_policy_traits; using KeyArgImpl = KeyArg::value && IsTransparent::value>; public: using init_type = typename PolicyTraits::init_type; using key_type = typename PolicyTraits::key_type; using allocator_type = Alloc; using size_type = size_t; using difference_type = ptrdiff_t; using hasher = Hash; using key_equal = Eq; using policy_type = Policy; using value_type = typename PolicyTraits::value_type; using reference = value_type&; using const_reference = const value_type&; using pointer = typename absl::allocator_traits< allocator_type>::template rebind_traits::pointer; using const_pointer = typename absl::allocator_traits< allocator_type>::template rebind_traits::const_pointer; private: // Alias used for heterogeneous lookup functions. // `key_arg` evaluates to `K` when the functors are transparent and to // `key_type` otherwise. It permits template argument deduction on `K` for the // transparent case. template using key_arg = typename KeyArgImpl::template type; using slot_type = typename PolicyTraits::slot_type; // TODO(b/289225379): we could add extra SOO space inside raw_hash_set // after CommonFields to allow inlining larger slot_types (e.g. std::string), // but it's a bit complicated if we want to support incomplete mapped_type in // flat_hash_map. We could potentially do this for flat_hash_set and for an // allowlist of `mapped_type`s of flat_hash_map that includes e.g. arithmetic // types, strings, cords, and pairs/tuples of allowlisted types. constexpr static bool SooEnabled() { return PolicyTraits::soo_enabled() && sizeof(slot_type) <= sizeof(HeapOrSoo) && alignof(slot_type) <= alignof(HeapOrSoo); } constexpr static size_t DefaultCapacity() { return SooEnabled() ? SooCapacity() : 0; } // Whether `size` fits in the SOO capacity of this table. bool fits_in_soo(size_t size) const { return SooEnabled() && size <= SooCapacity(); } // Whether this table is in SOO mode or non-SOO mode. bool is_soo() const { return fits_in_soo(capacity()); } bool is_full_soo() const { return is_soo() && !empty(); } // Give an early error when key_type is not hashable/eq. auto KeyTypeCanBeHashed(const Hash& h, const key_type& k) -> decltype(h(k)); auto KeyTypeCanBeEq(const Eq& eq, const key_type& k) -> decltype(eq(k, k)); using AllocTraits = absl::allocator_traits; using SlotAlloc = typename absl::allocator_traits< allocator_type>::template rebind_alloc; // People are often sloppy with the exact type of their allocator (sometimes // it has an extra const or is missing the pair, but rebinds made it work // anyway). using CharAlloc = typename absl::allocator_traits::template rebind_alloc; using SlotAllocTraits = typename absl::allocator_traits< allocator_type>::template rebind_traits; static_assert(std::is_lvalue_reference::value, "Policy::element() must return a reference"); // An enabler for insert(T&&): T must be convertible to init_type or be the // same as [cv] value_type [ref]. template using Insertable = absl::disjunction< std::is_same, absl::remove_cvref_t>, std::is_convertible>; template using IsNotBitField = std::is_pointer; // RequiresNotInit is a workaround for gcc prior to 7.1. // See https://godbolt.org/g/Y4xsUh. template using RequiresNotInit = typename std::enable_if::value, int>::type; template using IsDecomposable = IsDecomposable; template using IsDecomposableAndInsertable = IsDecomposable::value, T>>; // Evaluates to true if an assignment from the given type would require the // source object to remain alive for the life of the element. template using IsLifetimeBoundAssignmentFrom = std::conditional_t< policy_trait_element_is_owner::value, std::false_type, type_traits_internal::IsLifetimeBoundAssignment>; public: static_assert(std::is_same::value, "Allocators with custom pointer types are not supported"); static_assert(std::is_same::value, "Allocators with custom pointer types are not supported"); class iterator : private HashSetIteratorGenerationInfo { friend class raw_hash_set; friend struct HashtableFreeFunctionsAccess; public: using iterator_category = std::forward_iterator_tag; using value_type = typename raw_hash_set::value_type; using reference = absl::conditional_t; using pointer = absl::remove_reference_t*; using difference_type = typename raw_hash_set::difference_type; iterator() {} // PRECONDITION: not an end() iterator. reference operator*() const { AssertIsFull(ctrl_, generation(), generation_ptr(), "operator*()"); return unchecked_deref(); } // PRECONDITION: not an end() iterator. pointer operator->() const { AssertIsFull(ctrl_, generation(), generation_ptr(), "operator->"); return &operator*(); } // PRECONDITION: not an end() iterator. iterator& operator++() { AssertIsFull(ctrl_, generation(), generation_ptr(), "operator++"); ++ctrl_; ++slot_; skip_empty_or_deleted(); if (ABSL_PREDICT_FALSE(*ctrl_ == ctrl_t::kSentinel)) ctrl_ = nullptr; return *this; } // PRECONDITION: not an end() iterator. iterator operator++(int) { auto tmp = *this; ++*this; return tmp; } friend bool operator==(const iterator& a, const iterator& b) { AssertIsValidForComparison(a.ctrl_, a.generation(), a.generation_ptr()); AssertIsValidForComparison(b.ctrl_, b.generation(), b.generation_ptr()); AssertSameContainer(a.ctrl_, b.ctrl_, a.slot_, b.slot_, a.generation_ptr(), b.generation_ptr()); return a.ctrl_ == b.ctrl_; } friend bool operator!=(const iterator& a, const iterator& b) { return !(a == b); } private: iterator(ctrl_t* ctrl, slot_type* slot, const GenerationType* generation_ptr) : HashSetIteratorGenerationInfo(generation_ptr), ctrl_(ctrl), slot_(slot) { // This assumption helps the compiler know that any non-end iterator is // not equal to any end iterator. ABSL_ASSUME(ctrl != nullptr); } // This constructor is used in begin() to avoid an MSan // use-of-uninitialized-value error. Delegating from this constructor to // the previous one doesn't avoid the error. iterator(ctrl_t* ctrl, MaybeInitializedPtr slot, const GenerationType* generation_ptr) : HashSetIteratorGenerationInfo(generation_ptr), ctrl_(ctrl), slot_(to_slot(slot.get())) { // This assumption helps the compiler know that any non-end iterator is // not equal to any end iterator. ABSL_ASSUME(ctrl != nullptr); } // For end() iterators. explicit iterator(const GenerationType* generation_ptr) : HashSetIteratorGenerationInfo(generation_ptr), ctrl_(nullptr) {} // Fixes up `ctrl_` to point to a full or sentinel by advancing `ctrl_` and // `slot_` until they reach one. void skip_empty_or_deleted() { while (IsEmptyOrDeleted(*ctrl_)) { uint32_t shift = GroupFullEmptyOrDeleted{ctrl_}.CountLeadingEmptyOrDeleted(); ctrl_ += shift; slot_ += shift; } } ctrl_t* control() const { return ctrl_; } slot_type* slot() const { return slot_; } // We use EmptyGroup() for default-constructed iterators so that they can // be distinguished from end iterators, which have nullptr ctrl_. ctrl_t* ctrl_ = EmptyGroup(); // To avoid uninitialized member warnings, put slot_ in an anonymous union. // The member is not initialized on singleton and end iterators. union { slot_type* slot_; }; // An equality check which skips ABSL Hardening iterator invalidation // checks. // Should be used when the lifetimes of the iterators are well-enough // understood to prove that they cannot be invalid. bool unchecked_equals(const iterator& b) { return ctrl_ == b.control(); } // Dereferences the iterator without ABSL Hardening iterator invalidation // checks. reference unchecked_deref() const { return PolicyTraits::element(slot_); } }; class const_iterator { friend class raw_hash_set; template friend struct absl::container_internal::hashtable_debug_internal:: HashtableDebugAccess; public: using iterator_category = typename iterator::iterator_category; using value_type = typename raw_hash_set::value_type; using reference = typename raw_hash_set::const_reference; using pointer = typename raw_hash_set::const_pointer; using difference_type = typename raw_hash_set::difference_type; const_iterator() = default; // Implicit construction from iterator. const_iterator(iterator i) : inner_(std::move(i)) {} // NOLINT reference operator*() const { return *inner_; } pointer operator->() const { return inner_.operator->(); } const_iterator& operator++() { ++inner_; return *this; } const_iterator operator++(int) { return inner_++; } friend bool operator==(const const_iterator& a, const const_iterator& b) { return a.inner_ == b.inner_; } friend bool operator!=(const const_iterator& a, const const_iterator& b) { return !(a == b); } private: const_iterator(const ctrl_t* ctrl, const slot_type* slot, const GenerationType* gen) : inner_(const_cast(ctrl), const_cast(slot), gen) { } ctrl_t* control() const { return inner_.control(); } slot_type* slot() const { return inner_.slot(); } iterator inner_; bool unchecked_equals(const const_iterator& b) { return inner_.unchecked_equals(b.inner_); } }; using node_type = node_handle, Alloc>; using insert_return_type = InsertReturnType; // Note: can't use `= default` due to non-default noexcept (causes // problems for some compilers). NOLINTNEXTLINE raw_hash_set() noexcept( std::is_nothrow_default_constructible::value && std::is_nothrow_default_constructible::value && std::is_nothrow_default_constructible::value) {} ABSL_ATTRIBUTE_NOINLINE explicit raw_hash_set( size_t bucket_count, const hasher& hash = hasher(), const key_equal& eq = key_equal(), const allocator_type& alloc = allocator_type()) : settings_(CommonFields::CreateDefault(), hash, eq, alloc) { if (bucket_count > DefaultCapacity()) { ReserveEmptyNonAllocatedTableToFitBucketCount( common(), GetPolicyFunctions(), bucket_count); } } raw_hash_set(size_t bucket_count, const hasher& hash, const allocator_type& alloc) : raw_hash_set(bucket_count, hash, key_equal(), alloc) {} raw_hash_set(size_t bucket_count, const allocator_type& alloc) : raw_hash_set(bucket_count, hasher(), key_equal(), alloc) {} explicit raw_hash_set(const allocator_type& alloc) : raw_hash_set(0, hasher(), key_equal(), alloc) {} template raw_hash_set(InputIter first, InputIter last, size_t bucket_count = 0, const hasher& hash = hasher(), const key_equal& eq = key_equal(), const allocator_type& alloc = allocator_type()) : raw_hash_set(SelectBucketCountForIterRange(first, last, bucket_count), hash, eq, alloc) { insert(first, last); } template raw_hash_set(InputIter first, InputIter last, size_t bucket_count, const hasher& hash, const allocator_type& alloc) : raw_hash_set(first, last, bucket_count, hash, key_equal(), alloc) {} template raw_hash_set(InputIter first, InputIter last, size_t bucket_count, const allocator_type& alloc) : raw_hash_set(first, last, bucket_count, hasher(), key_equal(), alloc) {} template raw_hash_set(InputIter first, InputIter last, const allocator_type& alloc) : raw_hash_set(first, last, 0, hasher(), key_equal(), alloc) {} // Instead of accepting std::initializer_list as the first // argument like std::unordered_set does, we have two overloads // that accept std::initializer_list and std::initializer_list. // This is advantageous for performance. // // // Turns {"abc", "def"} into std::initializer_list, then // // copies the strings into the set. // std::unordered_set s = {"abc", "def"}; // // // Turns {"abc", "def"} into std::initializer_list, then // // copies the strings into the set. // absl::flat_hash_set s = {"abc", "def"}; // // The same trick is used in insert(). // // The enabler is necessary to prevent this constructor from triggering where // the copy constructor is meant to be called. // // absl::flat_hash_set a, b{a}; // // RequiresNotInit is a workaround for gcc prior to 7.1. template = 0, std::enable_if_t::value, int> = 0> raw_hash_set(std::initializer_list init, size_t bucket_count = 0, const hasher& hash = hasher(), const key_equal& eq = key_equal(), const allocator_type& alloc = allocator_type()) : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} raw_hash_set(std::initializer_list init, size_t bucket_count = 0, const hasher& hash = hasher(), const key_equal& eq = key_equal(), const allocator_type& alloc = allocator_type()) : raw_hash_set(init.begin(), init.end(), bucket_count, hash, eq, alloc) {} template = 0, std::enable_if_t::value, int> = 0> raw_hash_set(std::initializer_list init, size_t bucket_count, const hasher& hash, const allocator_type& alloc) : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} raw_hash_set(std::initializer_list init, size_t bucket_count, const hasher& hash, const allocator_type& alloc) : raw_hash_set(init, bucket_count, hash, key_equal(), alloc) {} template = 0, std::enable_if_t::value, int> = 0> raw_hash_set(std::initializer_list init, size_t bucket_count, const allocator_type& alloc) : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} raw_hash_set(std::initializer_list init, size_t bucket_count, const allocator_type& alloc) : raw_hash_set(init, bucket_count, hasher(), key_equal(), alloc) {} template = 0, std::enable_if_t::value, int> = 0> raw_hash_set(std::initializer_list init, const allocator_type& alloc) : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} raw_hash_set(std::initializer_list init, const allocator_type& alloc) : raw_hash_set(init, 0, hasher(), key_equal(), alloc) {} raw_hash_set(const raw_hash_set& that) : raw_hash_set(that, AllocTraits::select_on_container_copy_construction( allocator_type(that.char_alloc_ref()))) {} raw_hash_set(const raw_hash_set& that, const allocator_type& a) : raw_hash_set(GrowthToLowerboundCapacity(that.size()), that.hash_ref(), that.eq_ref(), a) { that.AssertNotDebugCapacity(); const size_t size = that.size(); if (size == 0) { return; } // We don't use `that.is_soo()` here because `that` can have non-SOO // capacity but have a size that fits into SOO capacity. if (fits_in_soo(size)) { ABSL_SWISSTABLE_ASSERT(size == 1); common().set_full_soo(); emplace_at(soo_iterator(), *that.begin()); if (should_sample_soo()) { GrowFullSooTableToNextCapacityForceSampling(common(), GetPolicyFunctions()); } return; } ABSL_SWISSTABLE_ASSERT(!that.is_soo()); const size_t cap = capacity(); // Note about single group tables: // 1. It is correct to have any order of elements. // 2. Order has to be non deterministic. // 3. We are assigning elements with arbitrary `shift` starting from // `capacity + shift` position. // 4. `shift` must be coprime with `capacity + 1` in order to be able to use // modular arithmetic to traverse all positions, instead if cycling // through a subset of positions. Odd numbers are coprime with any // `capacity + 1` (2^N). size_t offset = cap; const size_t shift = is_single_group(cap) ? (common().seed().seed() | 1) : 0; IterateOverFullSlots( that.common(), sizeof(slot_type), [&](const ctrl_t* that_ctrl, void* that_slot_void) { slot_type* that_slot = static_cast(that_slot_void); if (shift == 0) { // Big tables case. Position must be searched via probing. // The table is guaranteed to be empty, so we can do faster than // a full `insert`. const size_t hash = PolicyTraits::apply( HashElement{hash_ref()}, PolicyTraits::element(that_slot)); FindInfo target = find_first_non_full_outofline(common(), hash); infoz().RecordInsert(hash, target.probe_length); offset = target.offset; } else { // Small tables case. Next position is computed via shift. offset = (offset + shift) & cap; } const h2_t h2 = static_cast(*that_ctrl); ABSL_SWISSTABLE_ASSERT( // We rely that hash is not changed for small // tables. H2(PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(that_slot))) == h2 && "hash function value changed unexpectedly during the copy"); SetCtrl(common(), offset, h2, sizeof(slot_type)); emplace_at(iterator_at(offset), PolicyTraits::element(that_slot)); common().maybe_increment_generation_on_insert(); }); if (shift != 0) { // On small table copy we do not record individual inserts. // RecordInsert requires hash, but it is unknown for small tables. infoz().RecordStorageChanged(size, cap); } common().increment_size(size); growth_info().OverwriteManyEmptyAsFull(size); } ABSL_ATTRIBUTE_NOINLINE raw_hash_set(raw_hash_set&& that) noexcept( std::is_nothrow_copy_constructible::value && std::is_nothrow_copy_constructible::value && std::is_nothrow_copy_constructible::value) : // Hash, equality and allocator are copied instead of moved because // `that` must be left valid. If Hash is std::function, moving it // would create a nullptr functor that cannot be called. // Note: we avoid using exchange for better generated code. settings_(PolicyTraits::transfer_uses_memcpy() || !that.is_full_soo() ? std::move(that.common()) : CommonFields{full_soo_tag_t{}}, that.hash_ref(), that.eq_ref(), that.char_alloc_ref()) { if (!PolicyTraits::transfer_uses_memcpy() && that.is_full_soo()) { transfer(soo_slot(), that.soo_slot()); } that.common() = CommonFields::CreateDefault(); annotate_for_bug_detection_on_move(that); } raw_hash_set(raw_hash_set&& that, const allocator_type& a) : settings_(CommonFields::CreateDefault(), that.hash_ref(), that.eq_ref(), a) { if (CharAlloc(a) == that.char_alloc_ref()) { swap_common(that); annotate_for_bug_detection_on_move(that); } else { move_elements_allocs_unequal(std::move(that)); } } raw_hash_set& operator=(const raw_hash_set& that) { that.AssertNotDebugCapacity(); if (ABSL_PREDICT_FALSE(this == &that)) return *this; constexpr bool propagate_alloc = AllocTraits::propagate_on_container_copy_assignment::value; // TODO(ezb): maybe avoid allocating a new backing array if this->capacity() // is an exact match for that.size(). If this->capacity() is too big, then // it would make iteration very slow to reuse the allocation. Maybe we can // do the same heuristic as clear() and reuse if it's small enough. allocator_type alloc(propagate_alloc ? that.char_alloc_ref() : char_alloc_ref()); raw_hash_set tmp(that, alloc); // NOLINTNEXTLINE: not returning *this for performance. return assign_impl(std::move(tmp)); } raw_hash_set& operator=(raw_hash_set&& that) noexcept( absl::allocator_traits::is_always_equal::value && std::is_nothrow_move_assignable::value && std::is_nothrow_move_assignable::value) { // TODO(sbenza): We should only use the operations from the noexcept clause // to make sure we actually adhere to that contract. // NOLINTNEXTLINE: not returning *this for performance. return move_assign( std::move(that), typename AllocTraits::propagate_on_container_move_assignment()); } ~raw_hash_set() { destructor_impl(); if constexpr (SwisstableAssertAccessToDestroyedTable()) { common().set_capacity(InvalidCapacity::kDestroyed); } } iterator begin() ABSL_ATTRIBUTE_LIFETIME_BOUND { if (ABSL_PREDICT_FALSE(empty())) return end(); if (is_soo()) return soo_iterator(); iterator it = {control(), common().slots_union(), common().generation_ptr()}; it.skip_empty_or_deleted(); ABSL_SWISSTABLE_ASSERT(IsFull(*it.control())); return it; } iterator end() ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertNotDebugCapacity(); return iterator(common().generation_ptr()); } const_iterator begin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(this)->begin(); } const_iterator end() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(this)->end(); } const_iterator cbegin() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return begin(); } const_iterator cend() const ABSL_ATTRIBUTE_LIFETIME_BOUND { return end(); } bool empty() const { return !size(); } size_t size() const { AssertNotDebugCapacity(); return common().size(); } size_t capacity() const { const size_t cap = common().capacity(); // Compiler complains when using functions in ASSUME so use local variable. ABSL_ATTRIBUTE_UNUSED static constexpr size_t kDefaultCapacity = DefaultCapacity(); ABSL_ASSUME(cap >= kDefaultCapacity); return cap; } size_t max_size() const { return MaxValidSize(sizeof(slot_type)); } ABSL_ATTRIBUTE_REINITIALIZES void clear() { if (SwisstableGenerationsEnabled() && capacity() >= InvalidCapacity::kMovedFrom) { common().set_capacity(DefaultCapacity()); } AssertNotDebugCapacity(); // Iterating over this container is O(bucket_count()). When bucket_count() // is much greater than size(), iteration becomes prohibitively expensive. // For clear() it is more important to reuse the allocated array when the // container is small because allocation takes comparatively long time // compared to destruction of the elements of the container. So we pick the // largest bucket_count() threshold for which iteration is still fast and // past that we simply deallocate the array. const size_t cap = capacity(); if (cap == 0) { // Already guaranteed to be empty; so nothing to do. } else if (is_soo()) { if (!empty()) destroy(soo_slot()); common().set_empty_soo(); } else { destroy_slots(); clear_backing_array(/*reuse=*/cap < 128); } common().set_reserved_growth(0); common().set_reservation_size(0); } // This overload kicks in when the argument is an rvalue of insertable and // decomposable type other than init_type. // // flat_hash_map m; // m.insert(std::make_pair("abc", 42)); template ::value && IsNotBitField::value && !IsLifetimeBoundAssignmentFrom::value, int>()> std::pair insert(T&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { return emplace(std::forward(value)); } template ::value && IsNotBitField::value && IsLifetimeBoundAssignmentFrom::value, int> = 0> std::pair insert( T&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->template insert(std::forward(value)); } // This overload kicks in when the argument is a bitfield or an lvalue of // insertable and decomposable type. // // union { int n : 1; }; // flat_hash_set s; // s.insert(n); // // flat_hash_set s; // const char* p = "hello"; // s.insert(p); // template ::value && !IsLifetimeBoundAssignmentFrom::value, int>()> std::pair insert(const T& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { return emplace(value); } template ::value && IsLifetimeBoundAssignmentFrom::value, int> = 0> std::pair insert( const T& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->template insert(value); } // This overload kicks in when the argument is an rvalue of init_type. Its // purpose is to handle brace-init-list arguments. // // flat_hash_map s; // s.insert({"abc", 42}); std::pair insert(init_type&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND #if __cplusplus >= 202002L requires(!IsLifetimeBoundAssignmentFrom::value) #endif { return emplace(std::move(value)); } #if __cplusplus >= 202002L std::pair insert( init_type&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) ABSL_ATTRIBUTE_LIFETIME_BOUND requires(IsLifetimeBoundAssignmentFrom::value) { return emplace(std::move(value)); } #endif template ::value && IsNotBitField::value && !IsLifetimeBoundAssignmentFrom::value, int>()> iterator insert(const_iterator, T&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { return insert(std::forward(value)).first; } template ::value && IsNotBitField::value && IsLifetimeBoundAssignmentFrom::value, int> = 0> iterator insert(const_iterator hint, T&& value ABSL_INTERNAL_ATTRIBUTE_CAPTURED_BY(this)) ABSL_ATTRIBUTE_LIFETIME_BOUND { return this->template insert(hint, std::forward(value)); } template ::value, int> = 0> iterator insert(const_iterator, const T& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { return insert(value).first; } iterator insert(const_iterator, init_type&& value) ABSL_ATTRIBUTE_LIFETIME_BOUND { return insert(std::move(value)).first; } template void insert(InputIt first, InputIt last) { for (; first != last; ++first) emplace(*first); } template = 0, std::enable_if_t::value, int> = 0> void insert(std::initializer_list ilist) { insert(ilist.begin(), ilist.end()); } void insert(std::initializer_list ilist) { insert(ilist.begin(), ilist.end()); } insert_return_type insert(node_type&& node) ABSL_ATTRIBUTE_LIFETIME_BOUND { if (!node) return {end(), false, node_type()}; const auto& elem = PolicyTraits::element(CommonAccess::GetSlot(node)); auto res = PolicyTraits::apply( InsertSlot{*this, std::move(*CommonAccess::GetSlot(node))}, elem); if (res.second) { CommonAccess::Reset(&node); return {res.first, true, node_type()}; } else { return {res.first, false, std::move(node)}; } } iterator insert(const_iterator, node_type&& node) ABSL_ATTRIBUTE_LIFETIME_BOUND { auto res = insert(std::move(node)); node = std::move(res.node); return res.position; } // This overload kicks in if we can deduce the key from args. This enables us // to avoid constructing value_type if an entry with the same key already // exists. // // For example: // // flat_hash_map m = {{"abc", "def"}}; // // Creates no std::string copies and makes no heap allocations. // m.emplace("abc", "xyz"); template ::value, int> = 0> std::pair emplace(Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND { return PolicyTraits::apply(EmplaceDecomposable{*this}, std::forward(args)...); } // This overload kicks in if we cannot deduce the key from args. It constructs // value_type unconditionally and then either moves it into the table or // destroys. template ::value, int> = 0> std::pair emplace(Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND { alignas(slot_type) unsigned char raw[sizeof(slot_type)]; slot_type* slot = to_slot(&raw); construct(slot, std::forward(args)...); const auto& elem = PolicyTraits::element(slot); return PolicyTraits::apply(InsertSlot{*this, std::move(*slot)}, elem); } template iterator emplace_hint(const_iterator, Args&&... args) ABSL_ATTRIBUTE_LIFETIME_BOUND { return emplace(std::forward(args)...).first; } // Extension API: support for lazy emplace. // // Looks up key in the table. If found, returns the iterator to the element. // Otherwise calls `f` with one argument of type `raw_hash_set::constructor`, // and returns an iterator to the new element. // // `f` must abide by several restrictions: // - it MUST call `raw_hash_set::constructor` with arguments as if a // `raw_hash_set::value_type` is constructed, // - it MUST NOT access the container before the call to // `raw_hash_set::constructor`, and // - it MUST NOT erase the lazily emplaced element. // Doing any of these is undefined behavior. // // For example: // // std::unordered_set s; // // Makes ArenaStr even if "abc" is in the map. // s.insert(ArenaString(&arena, "abc")); // // flat_hash_set s; // // Makes ArenaStr only if "abc" is not in the map. // s.lazy_emplace("abc", [&](const constructor& ctor) { // ctor(&arena, "abc"); // }); // // WARNING: This API is currently experimental. If there is a way to implement // the same thing with the rest of the API, prefer that. class constructor { friend class raw_hash_set; public: template void operator()(Args&&... args) const { ABSL_SWISSTABLE_ASSERT(*slot_); PolicyTraits::construct(alloc_, *slot_, std::forward(args)...); *slot_ = nullptr; } private: constructor(allocator_type* a, slot_type** slot) : alloc_(a), slot_(slot) {} allocator_type* alloc_; slot_type** slot_; }; template iterator lazy_emplace(const key_arg& key, F&& f) ABSL_ATTRIBUTE_LIFETIME_BOUND { auto res = find_or_prepare_insert(key); if (res.second) { slot_type* slot = res.first.slot(); allocator_type alloc(char_alloc_ref()); std::forward(f)(constructor(&alloc, &slot)); ABSL_SWISSTABLE_ASSERT(!slot); } return res.first; } // Extension API: support for heterogeneous keys. // // std::unordered_set s; // // Turns "abc" into std::string. // s.erase("abc"); // // flat_hash_set s; // // Uses "abc" directly without copying it into std::string. // s.erase("abc"); template size_type erase(const key_arg& key) { auto it = find(key); if (it == end()) return 0; erase(it); return 1; } // Erases the element pointed to by `it`. Unlike `std::unordered_set::erase`, // this method returns void to reduce algorithmic complexity to O(1). The // iterator is invalidated so any increment should be done before calling // erase (e.g. `erase(it++)`). void erase(const_iterator cit) { erase(cit.inner_); } // This overload is necessary because otherwise erase(const K&) would be // a better match if non-const iterator is passed as an argument. void erase(iterator it) { AssertNotDebugCapacity(); AssertIsFull(it.control(), it.generation(), it.generation_ptr(), "erase()"); destroy(it.slot()); if (is_soo()) { common().set_empty_soo(); } else { erase_meta_only(it); } } iterator erase(const_iterator first, const_iterator last) ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertNotDebugCapacity(); // We check for empty first because clear_backing_array requires that // capacity() > 0 as a precondition. if (empty()) return end(); if (first == last) return last.inner_; if (is_soo()) { destroy(soo_slot()); common().set_empty_soo(); return end(); } if (first == begin() && last == end()) { // TODO(ezb): we access control bytes in destroy_slots so it could make // sense to combine destroy_slots and clear_backing_array to avoid cache // misses when the table is large. Note that we also do this in clear(). destroy_slots(); clear_backing_array(/*reuse=*/true); common().set_reserved_growth(common().reservation_size()); return end(); } while (first != last) { erase(first++); } return last.inner_; } // Moves elements from `src` into `this`. // If the element already exists in `this`, it is left unmodified in `src`. template void merge(raw_hash_set& src) { // NOLINT AssertNotDebugCapacity(); src.AssertNotDebugCapacity(); assert(this != &src); // Returns whether insertion took place. const auto insert_slot = [this](slot_type* src_slot) { return PolicyTraits::apply(InsertSlot{*this, std::move(*src_slot)}, PolicyTraits::element(src_slot)) .second; }; if (src.is_soo()) { if (src.empty()) return; if (insert_slot(src.soo_slot())) src.common().set_empty_soo(); return; } for (auto it = src.begin(), e = src.end(); it != e;) { auto next = std::next(it); if (insert_slot(it.slot())) src.erase_meta_only(it); it = next; } } template void merge(raw_hash_set&& src) { merge(src); } node_type extract(const_iterator position) { AssertNotDebugCapacity(); AssertIsFull(position.control(), position.inner_.generation(), position.inner_.generation_ptr(), "extract()"); allocator_type alloc(char_alloc_ref()); auto node = CommonAccess::Transfer(alloc, position.slot()); if (is_soo()) { common().set_empty_soo(); } else { erase_meta_only(position); } return node; } template ::value, int> = 0> node_type extract(const key_arg& key) { auto it = find(key); return it == end() ? node_type() : extract(const_iterator{it}); } void swap(raw_hash_set& that) noexcept( IsNoThrowSwappable() && IsNoThrowSwappable() && IsNoThrowSwappable( typename AllocTraits::propagate_on_container_swap{})) { AssertNotDebugCapacity(); that.AssertNotDebugCapacity(); using std::swap; swap_common(that); swap(hash_ref(), that.hash_ref()); swap(eq_ref(), that.eq_ref()); SwapAlloc(char_alloc_ref(), that.char_alloc_ref(), typename AllocTraits::propagate_on_container_swap{}); } void rehash(size_t n) { Rehash(common(), GetPolicyFunctions(), n); } void reserve(size_t n) { const size_t cap = capacity(); if (ABSL_PREDICT_TRUE(cap > DefaultCapacity() || // !SooEnabled() implies empty(), so we can skip the // check for optimization. (SooEnabled() && !empty()))) { ReserveAllocatedTable(common(), GetPolicyFunctions(), n); } else { if (ABSL_PREDICT_TRUE(n > DefaultCapacity())) { ReserveEmptyNonAllocatedTableToFitNewSize(common(), GetPolicyFunctions(), n); } } } // Extension API: support for heterogeneous keys. // // std::unordered_set s; // // Turns "abc" into std::string. // s.count("abc"); // // ch_set s; // // Uses "abc" directly without copying it into std::string. // s.count("abc"); template size_t count(const key_arg& key) const { return find(key) == end() ? 0 : 1; } // Issues CPU prefetch instructions for the memory needed to find or insert // a key. Like all lookup functions, this support heterogeneous keys. // // NOTE: This is a very low level operation and should not be used without // specific benchmarks indicating its importance. template void prefetch(const key_arg& key) const { if (capacity() == DefaultCapacity()) return; (void)key; // Avoid probing if we won't be able to prefetch the addresses received. #ifdef ABSL_HAVE_PREFETCH prefetch_heap_block(); auto seq = probe(common(), hash_ref()(key)); PrefetchToLocalCache(control() + seq.offset()); PrefetchToLocalCache(slot_array() + seq.offset()); #endif // ABSL_HAVE_PREFETCH } template ABSL_DEPRECATE_AND_INLINE() iterator find(const key_arg& key, size_t) ABSL_ATTRIBUTE_LIFETIME_BOUND { return find(key); } // The API of find() has one extension: the type of the key argument doesn't // have to be key_type. This is so called heterogeneous key support. template iterator find(const key_arg& key) ABSL_ATTRIBUTE_LIFETIME_BOUND { AssertOnFind(key); if (is_soo()) return find_soo(key); prefetch_heap_block(); return find_non_soo(key, hash_ref()(key)); } template ABSL_DEPRECATE_AND_INLINE() const_iterator find(const key_arg& key, size_t) const ABSL_ATTRIBUTE_LIFETIME_BOUND { return find(key); } template const_iterator find(const key_arg& key) const ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(this)->find(key); } template bool contains(const key_arg& key) const { // Here neither the iterator returned by `find()` nor `end()` can be invalid // outside of potential thread-safety issues. // `find()`'s return value is constructed, used, and then destructed // all in this context. return !find(key).unchecked_equals(end()); } template std::pair equal_range(const key_arg& key) ABSL_ATTRIBUTE_LIFETIME_BOUND { auto it = find(key); if (it != end()) return {it, std::next(it)}; return {it, it}; } template std::pair equal_range( const key_arg& key) const ABSL_ATTRIBUTE_LIFETIME_BOUND { auto it = find(key); if (it != end()) return {it, std::next(it)}; return {it, it}; } size_t bucket_count() const { return capacity(); } float load_factor() const { return capacity() ? static_cast(size()) / capacity() : 0.0; } float max_load_factor() const { return 1.0f; } void max_load_factor(float) { // Does nothing. } hasher hash_function() const { return hash_ref(); } key_equal key_eq() const { return eq_ref(); } allocator_type get_allocator() const { return allocator_type(char_alloc_ref()); } friend bool operator==(const raw_hash_set& a, const raw_hash_set& b) { if (a.size() != b.size()) return false; const raw_hash_set* outer = &a; const raw_hash_set* inner = &b; if (outer->capacity() > inner->capacity()) std::swap(outer, inner); for (const value_type& elem : *outer) { auto it = PolicyTraits::apply(FindElement{*inner}, elem); if (it == inner->end()) return false; // Note: we used key_equal to check for key equality in FindElement, but // we may need to do an additional comparison using // value_type::operator==. E.g. the keys could be equal and the // mapped_types could be unequal in a map or even in a set, key_equal // could ignore some fields that aren't ignored by operator==. static constexpr bool kKeyEqIsValueEq = std::is_same::value && std::is_same>::value; if (!kKeyEqIsValueEq && !(*it == elem)) return false; } return true; } friend bool operator!=(const raw_hash_set& a, const raw_hash_set& b) { return !(a == b); } template friend typename std::enable_if::value, H>::type AbslHashValue(H h, const raw_hash_set& s) { return H::combine(H::combine_unordered(std::move(h), s.begin(), s.end()), s.size()); } friend void swap(raw_hash_set& a, raw_hash_set& b) noexcept(noexcept(a.swap(b))) { a.swap(b); } private: template friend struct absl::container_internal::hashtable_debug_internal:: HashtableDebugAccess; friend struct absl::container_internal::HashtableFreeFunctionsAccess; struct FindElement { template const_iterator operator()(const K& key, Args&&...) const { return s.find(key); } const raw_hash_set& s; }; struct HashElement { template size_t operator()(const K& key, Args&&...) const { return h(key); } const hasher& h; }; template struct EqualElement { template bool operator()(const K2& lhs, Args&&...) const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN(eq(lhs, rhs)); } const K1& rhs; const key_equal& eq; }; struct EmplaceDecomposable { template std::pair operator()(const K& key, Args&&... args) const { auto res = s.find_or_prepare_insert(key); if (res.second) { s.emplace_at(res.first, std::forward(args)...); } return res; } raw_hash_set& s; }; template struct InsertSlot { template std::pair operator()(const K& key, Args&&...) && { auto res = s.find_or_prepare_insert(key); if (res.second) { s.transfer(res.first.slot(), &slot); } else if (do_destroy) { s.destroy(&slot); } return res; } raw_hash_set& s; // Constructed slot. Either moved into place or destroyed. slot_type&& slot; }; template inline void construct(slot_type* slot, Args&&... args) { common().RunWithReentrancyGuard([&] { allocator_type alloc(char_alloc_ref()); PolicyTraits::construct(&alloc, slot, std::forward(args)...); }); } inline void destroy(slot_type* slot) { common().RunWithReentrancyGuard([&] { allocator_type alloc(char_alloc_ref()); PolicyTraits::destroy(&alloc, slot); }); } inline void transfer(slot_type* to, slot_type* from) { common().RunWithReentrancyGuard([&] { allocator_type alloc(char_alloc_ref()); PolicyTraits::transfer(&alloc, to, from); }); } // TODO(b/289225379): consider having a helper class that has the impls for // SOO functionality. template iterator find_soo(const key_arg& key) { ABSL_SWISSTABLE_ASSERT(is_soo()); return empty() || !PolicyTraits::apply(EqualElement{key, eq_ref()}, PolicyTraits::element(soo_slot())) ? end() : soo_iterator(); } template iterator find_non_soo(const key_arg& key, size_t hash) { ABSL_SWISSTABLE_ASSERT(!is_soo()); auto seq = probe(common(), hash); const h2_t h2 = H2(hash); const ctrl_t* ctrl = control(); while (true) { Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( EqualElement{key, eq_ref()}, PolicyTraits::element(slot_array() + seq.offset(i))))) return iterator_at(seq.offset(i)); } if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } // Returns true if the table needs to be sampled. // This should be called on insertion into an empty SOO table and in copy // construction when the size can fit in SOO capacity. bool should_sample_soo() const { ABSL_SWISSTABLE_ASSERT(is_soo()); if (!ShouldSampleHashtablezInfoForAlloc()) return false; return ABSL_PREDICT_FALSE(ShouldSampleNextTable()); } void clear_backing_array(bool reuse) { ABSL_SWISSTABLE_ASSERT(capacity() > DefaultCapacity()); ClearBackingArray(common(), GetPolicyFunctions(), &char_alloc_ref(), reuse, SooEnabled()); } void destroy_slots() { ABSL_SWISSTABLE_ASSERT(!is_soo()); if (PolicyTraits::template destroy_is_trivial()) return; auto destroy_slot = [&](const ctrl_t*, void* slot) { this->destroy(static_cast(slot)); }; if constexpr (SwisstableAssertAccessToDestroyedTable()) { CommonFields common_copy(non_soo_tag_t{}, this->common()); common().set_capacity(InvalidCapacity::kDestroyed); IterateOverFullSlots(common_copy, sizeof(slot_type), destroy_slot); common().set_capacity(common_copy.capacity()); } else { IterateOverFullSlots(common(), sizeof(slot_type), destroy_slot); } } void dealloc() { ABSL_SWISSTABLE_ASSERT(capacity() > DefaultCapacity()); // Unpoison before returning the memory to the allocator. SanitizerUnpoisonMemoryRegion(slot_array(), sizeof(slot_type) * capacity()); infoz().Unregister(); DeallocateBackingArray(&char_alloc_ref(), capacity(), control(), sizeof(slot_type), alignof(slot_type), common().has_infoz()); } void destructor_impl() { if (SwisstableGenerationsEnabled() && capacity() >= InvalidCapacity::kMovedFrom) { return; } if (capacity() == 0) return; if (is_soo()) { if (!empty()) { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED(destroy(soo_slot())); } return; } destroy_slots(); dealloc(); } // Erases, but does not destroy, the value pointed to by `it`. // // This merely updates the pertinent control byte. This can be used in // conjunction with Policy::transfer to move the object to another place. void erase_meta_only(const_iterator it) { ABSL_SWISSTABLE_ASSERT(!is_soo()); EraseMetaOnly(common(), static_cast(it.control() - control()), sizeof(slot_type)); } size_t hash_of(slot_type* slot) const { return PolicyTraits::apply(HashElement{hash_ref()}, PolicyTraits::element(slot)); } void resize_full_soo_table_to_next_capacity() { ABSL_SWISSTABLE_ASSERT(SooEnabled()); ABSL_SWISSTABLE_ASSERT(capacity() == SooCapacity()); ABSL_SWISSTABLE_ASSERT(!empty()); if constexpr (SooEnabled()) { GrowFullSooTableToNextCapacity( common(), GetPolicyFunctions(), hash_of(soo_slot())); } } // Casting directly from e.g. char* to slot_type* can cause compilation errors // on objective-C. This function converts to void* first, avoiding the issue. static slot_type* to_slot(void* buf) { return static_cast(buf); } // Requires that lhs does not have a full SOO slot. static void move_common(bool rhs_is_full_soo, CharAlloc& rhs_alloc, CommonFields& lhs, CommonFields&& rhs) { if (PolicyTraits::transfer_uses_memcpy() || !rhs_is_full_soo) { lhs = std::move(rhs); } else { lhs.move_non_heap_or_soo_fields(rhs); rhs.RunWithReentrancyGuard([&] { lhs.RunWithReentrancyGuard([&] { PolicyTraits::transfer(&rhs_alloc, to_slot(lhs.soo_data()), to_slot(rhs.soo_data())); }); }); } } // Swaps common fields making sure to avoid memcpy'ing a full SOO slot if we // aren't allowed to do so. void swap_common(raw_hash_set& that) { using std::swap; if (PolicyTraits::transfer_uses_memcpy()) { swap(common(), that.common()); return; } CommonFields tmp = CommonFields(uninitialized_tag_t{}); const bool that_is_full_soo = that.is_full_soo(); move_common(that_is_full_soo, that.char_alloc_ref(), tmp, std::move(that.common())); move_common(is_full_soo(), char_alloc_ref(), that.common(), std::move(common())); move_common(that_is_full_soo, that.char_alloc_ref(), common(), std::move(tmp)); } void annotate_for_bug_detection_on_move( ABSL_ATTRIBUTE_UNUSED raw_hash_set& that) { // We only enable moved-from validation when generations are enabled (rather // than using NDEBUG) to avoid issues in which NDEBUG is enabled in some // translation units but not in others. if (SwisstableGenerationsEnabled()) { that.common().set_capacity(this == &that ? InvalidCapacity::kSelfMovedFrom : InvalidCapacity::kMovedFrom); } if (!SwisstableGenerationsEnabled() || capacity() == DefaultCapacity() || capacity() > kAboveMaxValidCapacity) { return; } common().increment_generation(); if (!empty() && common().should_rehash_for_bug_detection_on_move()) { ResizeAllocatedTableWithSeedChange(common(), GetPolicyFunctions(), capacity()); } } template raw_hash_set& assign_impl(raw_hash_set&& that) { // We don't bother checking for this/that aliasing. We just need to avoid // breaking the invariants in that case. destructor_impl(); move_common(that.is_full_soo(), that.char_alloc_ref(), common(), std::move(that.common())); hash_ref() = that.hash_ref(); eq_ref() = that.eq_ref(); CopyAlloc(char_alloc_ref(), that.char_alloc_ref(), std::integral_constant()); that.common() = CommonFields::CreateDefault(); annotate_for_bug_detection_on_move(that); return *this; } raw_hash_set& move_elements_allocs_unequal(raw_hash_set&& that) { const size_t size = that.size(); if (size == 0) return *this; reserve(size); for (iterator it = that.begin(); it != that.end(); ++it) { insert(std::move(PolicyTraits::element(it.slot()))); that.destroy(it.slot()); } if (!that.is_soo()) that.dealloc(); that.common() = CommonFields::CreateDefault(); annotate_for_bug_detection_on_move(that); return *this; } raw_hash_set& move_assign(raw_hash_set&& that, std::true_type /*propagate_alloc*/) { return assign_impl(std::move(that)); } raw_hash_set& move_assign(raw_hash_set&& that, std::false_type /*propagate_alloc*/) { if (char_alloc_ref() == that.char_alloc_ref()) { return assign_impl(std::move(that)); } // Aliasing can't happen here because allocs would compare equal above. assert(this != &that); destructor_impl(); // We can't take over that's memory so we need to move each element. // While moving elements, this should have that's hash/eq so copy hash/eq // before moving elements. hash_ref() = that.hash_ref(); eq_ref() = that.eq_ref(); return move_elements_allocs_unequal(std::move(that)); } template std::pair find_or_prepare_insert_soo(const K& key) { if (empty()) { if (should_sample_soo()) { GrowEmptySooTableToNextCapacityForceSampling(common(), GetPolicyFunctions()); } else { common().set_full_soo(); return {soo_iterator(), true}; } } else if (PolicyTraits::apply(EqualElement{key, eq_ref()}, PolicyTraits::element(soo_slot()))) { return {soo_iterator(), false}; } else { resize_full_soo_table_to_next_capacity(); } const size_t index = PrepareInsertAfterSoo(hash_ref()(key), sizeof(slot_type), common()); return {iterator_at(index), true}; } template std::pair find_or_prepare_insert_non_soo(const K& key) { ABSL_SWISSTABLE_ASSERT(!is_soo()); prefetch_heap_block(); const size_t hash = hash_ref()(key); auto seq = probe(common(), hash); const h2_t h2 = H2(hash); const ctrl_t* ctrl = control(); while (true) { Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( EqualElement{key, eq_ref()}, PolicyTraits::element(slot_array() + seq.offset(i))))) return {iterator_at(seq.offset(i)), false}; } auto mask_empty = g.MaskEmpty(); if (ABSL_PREDICT_TRUE(mask_empty)) { size_t target = seq.offset( GetInsertionOffset(mask_empty, capacity(), hash, common().seed())); return {iterator_at(PrepareInsertNonSoo(common(), GetPolicyFunctions(), hash, FindInfo{target, seq.index()})), true}; } seq.next(); ABSL_SWISSTABLE_ASSERT(seq.index() <= capacity() && "full table!"); } } protected: // Asserts for correctness that we run on find/find_or_prepare_insert. template void AssertOnFind(ABSL_ATTRIBUTE_UNUSED const K& key) { AssertHashEqConsistent(key); AssertNotDebugCapacity(); } // Asserts that the capacity is not a sentinel invalid value. void AssertNotDebugCapacity() const { #ifdef NDEBUG if (!SwisstableGenerationsEnabled()) { return; } #endif if (ABSL_PREDICT_TRUE(capacity() < InvalidCapacity::kAboveMaxValidCapacity)) { return; } assert(capacity() != InvalidCapacity::kReentrance && "Reentrant container access during element construction/destruction " "is not allowed."); if constexpr (SwisstableAssertAccessToDestroyedTable()) { if (capacity() == InvalidCapacity::kDestroyed) { ABSL_RAW_LOG(FATAL, "Use of destroyed hash table."); } } if (SwisstableGenerationsEnabled() && ABSL_PREDICT_FALSE(capacity() >= InvalidCapacity::kMovedFrom)) { if (capacity() == InvalidCapacity::kSelfMovedFrom) { // If this log triggers, then a hash table was move-assigned to itself // and then used again later without being reinitialized. ABSL_RAW_LOG(FATAL, "Use of self-move-assigned hash table."); } ABSL_RAW_LOG(FATAL, "Use of moved-from hash table."); } } // Asserts that hash and equal functors provided by the user are consistent, // meaning that `eq(k1, k2)` implies `hash(k1)==hash(k2)`. template void AssertHashEqConsistent(const K& key) { #ifdef NDEBUG return; #endif // If the hash/eq functors are known to be consistent, then skip validation. if (std::is_same::value && std::is_same::value) { return; } if (std::is_scalar::value && std::is_same>::value && std::is_same>::value) { return; } if (empty()) return; const size_t hash_of_arg = hash_ref()(key); const auto assert_consistent = [&](const ctrl_t*, void* slot) { const value_type& element = PolicyTraits::element(static_cast(slot)); const bool is_key_equal = PolicyTraits::apply(EqualElement{key, eq_ref()}, element); if (!is_key_equal) return; const size_t hash_of_slot = PolicyTraits::apply(HashElement{hash_ref()}, element); ABSL_ATTRIBUTE_UNUSED const bool is_hash_equal = hash_of_arg == hash_of_slot; assert((!is_key_equal || is_hash_equal) && "eq(k1, k2) must imply that hash(k1) == hash(k2). " "hash/eq functors are inconsistent."); }; if (is_soo()) { assert_consistent(/*unused*/ nullptr, soo_slot()); return; } // We only do validation for small tables so that it's constant time. if (capacity() > 16) return; IterateOverFullSlots(common(), sizeof(slot_type), assert_consistent); } // Attempts to find `key` in the table; if it isn't found, returns an iterator // where the value can be inserted into, with the control byte already set to // `key`'s H2. Returns a bool indicating whether an insertion can take place. template std::pair find_or_prepare_insert(const K& key) { AssertOnFind(key); if (is_soo()) return find_or_prepare_insert_soo(key); return find_or_prepare_insert_non_soo(key); } // Constructs the value in the space pointed by the iterator. This only works // after an unsuccessful find_or_prepare_insert() and before any other // modifications happen in the raw_hash_set. // // PRECONDITION: iter was returned from find_or_prepare_insert(k), where k is // the key decomposed from `forward(args)...`, and the bool returned by // find_or_prepare_insert(k) was true. // POSTCONDITION: *m.iterator_at(i) == value_type(forward(args)...). template void emplace_at(iterator iter, Args&&... args) { construct(iter.slot(), std::forward(args)...); assert(PolicyTraits::apply(FindElement{*this}, *iter) == iter && "constructed value does not match the lookup key"); } iterator iterator_at(size_t i) ABSL_ATTRIBUTE_LIFETIME_BOUND { return {control() + i, slot_array() + i, common().generation_ptr()}; } const_iterator iterator_at(size_t i) const ABSL_ATTRIBUTE_LIFETIME_BOUND { return const_cast(this)->iterator_at(i); } reference unchecked_deref(iterator it) { return it.unchecked_deref(); } private: friend struct RawHashSetTestOnlyAccess; // The number of slots we can still fill without needing to rehash. // // This is stored separately due to tombstones: we do not include tombstones // in the growth capacity, because we'd like to rehash when the table is // otherwise filled with tombstones: otherwise, probe sequences might get // unacceptably long without triggering a rehash. Callers can also force a // rehash via the standard `rehash(0)`, which will recompute this value as a // side-effect. // // See `CapacityToGrowth()`. size_t growth_left() const { ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_left(); } GrowthInfo& growth_info() { ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_info(); } GrowthInfo growth_info() const { ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().growth_info(); } // Prefetch the heap-allocated memory region to resolve potential TLB and // cache misses. This is intended to overlap with execution of calculating the // hash for a key. void prefetch_heap_block() const { ABSL_SWISSTABLE_ASSERT(!is_soo()); #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__) __builtin_prefetch(control(), 0, 1); #endif } CommonFields& common() { return settings_.template get<0>(); } const CommonFields& common() const { return settings_.template get<0>(); } ctrl_t* control() const { ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().control(); } slot_type* slot_array() const { ABSL_SWISSTABLE_ASSERT(!is_soo()); return static_cast(common().slot_array()); } slot_type* soo_slot() { ABSL_SWISSTABLE_ASSERT(is_soo()); ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN( static_cast(common().soo_data())); } const slot_type* soo_slot() const { ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN( const_cast(this)->soo_slot()); } iterator soo_iterator() { return {SooControl(), soo_slot(), common().generation_ptr()}; } const_iterator soo_iterator() const { return const_cast(this)->soo_iterator(); } HashtablezInfoHandle infoz() { ABSL_SWISSTABLE_ASSERT(!is_soo()); return common().infoz(); } hasher& hash_ref() { return settings_.template get<1>(); } const hasher& hash_ref() const { return settings_.template get<1>(); } key_equal& eq_ref() { return settings_.template get<2>(); } const key_equal& eq_ref() const { return settings_.template get<2>(); } CharAlloc& char_alloc_ref() { return settings_.template get<3>(); } const CharAlloc& char_alloc_ref() const { return settings_.template get<3>(); } static void* get_char_alloc_ref_fn(CommonFields& common) { auto* h = reinterpret_cast(&common); return &h->char_alloc_ref(); } static void* get_hash_ref_fn(CommonFields& common) { auto* h = reinterpret_cast(&common); // TODO(b/397453582): Remove support for const hasher. return const_cast*>(&h->hash_ref()); } static void transfer_n_slots_fn(void* set, void* dst, void* src, size_t count) { auto* src_slot = to_slot(src); auto* dst_slot = to_slot(dst); auto* h = static_cast(set); for (; count > 0; --count, ++src_slot, ++dst_slot) { h->transfer(dst_slot, src_slot); } } // TODO(b/382423690): Type erase by GetKey + Hash for memcpyable types. static size_t find_new_positions_and_transfer_slots_fn(CommonFields& common, ctrl_t* old_ctrl, void* old_slots, size_t old_capacity) { auto* set = reinterpret_cast(&common); slot_type* new_slots = set->slot_array(); slot_type* old_slots_ptr = to_slot(old_slots); const auto insert_slot = [&](slot_type* slot) { size_t hash = PolicyTraits::apply(HashElement{set->hash_ref()}, PolicyTraits::element(slot)); auto target = find_first_non_full(common, hash); SetCtrl(common, target.offset, H2(hash), sizeof(slot_type)); set->transfer(new_slots + target.offset, slot); return target.probe_length; }; size_t total_probe_length = 0; for (size_t i = 0; i < old_capacity; ++i) { if (IsFull(old_ctrl[i])) { total_probe_length += insert_slot(old_slots_ptr + i); } } return total_probe_length; } static const PolicyFunctions& GetPolicyFunctions() { static_assert(sizeof(slot_type) <= (std::numeric_limits::max)(), "Slot size is too large. Use std::unique_ptr for value type " "or use absl::node_hash_{map,set}."); static_assert(alignof(slot_type) <= size_t{(std::numeric_limits::max)()}); static_assert(sizeof(key_type) <= size_t{(std::numeric_limits::max)()}); static_assert(sizeof(value_type) <= size_t{(std::numeric_limits::max)()}); static constexpr size_t kBackingArrayAlignment = BackingArrayAlignment(alignof(slot_type)); static constexpr PolicyFunctions value = { static_cast(sizeof(key_type)), static_cast(sizeof(value_type)), static_cast(sizeof(slot_type)), static_cast(alignof(slot_type)), static_cast(SooEnabled() ? SooCapacity() : 0), ShouldSampleHashtablezInfoForAlloc(), // TODO(b/328722020): try to type erase // for standard layout and alignof(Hash) <= alignof(CommonFields). std::is_empty_v ? &GetRefForEmptyClass : &raw_hash_set::get_hash_ref_fn, PolicyTraits::template get_hash_slot_fn(), PolicyTraits::transfer_uses_memcpy() ? TransferNRelocatable : &raw_hash_set::transfer_n_slots_fn, std::is_empty_v ? &GetRefForEmptyClass : &raw_hash_set::get_char_alloc_ref_fn, &AllocateBackingArray, &DeallocateBackingArray, &raw_hash_set::find_new_positions_and_transfer_slots_fn}; return value; } // Bundle together CommonFields plus other objects which might be empty. // CompressedTuple will ensure that sizeof is not affected by any of the empty // fields that occur after CommonFields. absl::container_internal::CompressedTuple settings_{CommonFields::CreateDefault(), hasher{}, key_equal{}, CharAlloc{}}; }; // Friend access for free functions in raw_hash_set.h. struct HashtableFreeFunctionsAccess { template static typename Set::size_type EraseIf(Predicate& pred, Set* c) { if (c->empty()) { return 0; } if (c->is_soo()) { auto it = c->soo_iterator(); if (!pred(*it)) { ABSL_SWISSTABLE_ASSERT(c->size() == 1 && "hash table was modified unexpectedly"); return 0; } c->destroy(it.slot()); c->common().set_empty_soo(); return 1; } ABSL_ATTRIBUTE_UNUSED const size_t original_size_for_assert = c->size(); size_t num_deleted = 0; using SlotType = typename Set::slot_type; IterateOverFullSlots( c->common(), sizeof(SlotType), [&](const ctrl_t* ctrl, void* slot_void) { auto* slot = static_cast(slot_void); if (pred(Set::PolicyTraits::element(slot))) { c->destroy(slot); EraseMetaOnly(c->common(), static_cast(ctrl - c->control()), sizeof(*slot)); ++num_deleted; } }); // NOTE: IterateOverFullSlots allow removal of the current element, so we // verify the size additionally here. ABSL_SWISSTABLE_ASSERT(original_size_for_assert - num_deleted == c->size() && "hash table was modified unexpectedly"); return num_deleted; } template static void ForEach(Callback& cb, Set* c) { if (c->empty()) { return; } if (c->is_soo()) { cb(*c->soo_iterator()); return; } using SlotType = typename Set::slot_type; using ElementTypeWithConstness = decltype(*c->begin()); IterateOverFullSlots( c->common(), sizeof(SlotType), [&cb](const ctrl_t*, void* slot) { ElementTypeWithConstness& element = Set::PolicyTraits::element(static_cast(slot)); cb(element); }); } }; // Erases all elements that satisfy the predicate `pred` from the container `c`. template typename raw_hash_set::size_type EraseIf( Predicate& pred, raw_hash_set* c) { return HashtableFreeFunctionsAccess::EraseIf(pred, c); } // Calls `cb` for all elements in the container `c`. template void ForEach(Callback& cb, raw_hash_set* c) { return HashtableFreeFunctionsAccess::ForEach(cb, c); } template void ForEach(Callback& cb, const raw_hash_set* c) { return HashtableFreeFunctionsAccess::ForEach(cb, c); } namespace hashtable_debug_internal { template struct HashtableDebugAccess> { using Traits = typename Set::PolicyTraits; using Slot = typename Traits::slot_type; static size_t GetNumProbes(const Set& set, const typename Set::key_type& key) { if (set.is_soo()) return 0; size_t num_probes = 0; const size_t hash = set.hash_ref()(key); auto seq = probe(set.common(), hash); const h2_t h2 = H2(hash); const ctrl_t* ctrl = set.control(); while (true) { container_internal::Group g{ctrl + seq.offset()}; for (uint32_t i : g.Match(h2)) { if (Traits::apply( typename Set::template EqualElement{ key, set.eq_ref()}, Traits::element(set.slot_array() + seq.offset(i)))) return num_probes; ++num_probes; } if (g.MaskEmpty()) return num_probes; seq.next(); ++num_probes; } } static size_t AllocatedByteSize(const Set& c) { size_t capacity = c.capacity(); if (capacity == 0) return 0; size_t m = c.is_soo() ? 0 : c.common().alloc_size(sizeof(Slot), alignof(Slot)); size_t per_slot = Traits::space_used(static_cast(nullptr)); if (per_slot != ~size_t{}) { m += per_slot * c.size(); } else { for (auto it = c.begin(); it != c.end(); ++it) { m += Traits::space_used(it.slot()); } } return m; } }; } // namespace hashtable_debug_internal // Extern template instantiations reduce binary size and linker input size. // Function definition is in raw_hash_set.cc. extern template void GrowFullSooTableToNextCapacity<0, false>( CommonFields&, const PolicyFunctions&, size_t); extern template void GrowFullSooTableToNextCapacity<1, true>( CommonFields&, const PolicyFunctions&, size_t); extern template void GrowFullSooTableToNextCapacity<4, true>( CommonFields&, const PolicyFunctions&, size_t); extern template void GrowFullSooTableToNextCapacity<8, true>( CommonFields&, const PolicyFunctions&, size_t); #if UINTPTR_MAX == UINT64_MAX extern template void GrowFullSooTableToNextCapacity<16, true>( CommonFields&, const PolicyFunctions&, size_t); #endif } // namespace container_internal ABSL_NAMESPACE_END } // namespace absl #undef ABSL_SWISSTABLE_ENABLE_GENERATIONS #undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED #undef ABSL_SWISSTABLE_IGNORE_UNINITIALIZED_RETURN #undef ABSL_SWISSTABLE_ASSERT #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_