// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

// Various collation-related algorithms and constants in this file are
// adapted from ICU4C and, therefore, are subject to the ICU license as
// described in LICENSE.

//! This module holds the 64-bit `CollationElement` struct used for
//! the actual comparison, the 32-bit `CollationElement32` struct
//! that's used for storage. (Strictly speaking, the storage is
//! `RawBytesULE<4>`.) And the `CollationElements` iterator adapter
//! that turns an iterator over `char` into an iterator over
//! `CollationElement`. (To match the structure of ICU4C, this isn't
//! a real Rust `Iterator`. Instead of signaling end by returning
//! `None`, it signals end by returning `NO_CE`.)
//!
//! This module also declares various constants that are also used
//! by the `comparison` module.

use core::char::REPLACEMENT_CHARACTER;
use core::marker::PhantomData;
use icu_collections::char16trie::TrieResult;
use icu_collections::codepointtrie::AbstractCodePointTrie;
use icu_collections::codepointtrie::WithTrie;
use icu_normalizer::provider::DecompositionTables;
use icu_properties::props::CanonicalCombiningClass;
use smallvec::SmallVec;
use zerovec::ule::AsULE;
use zerovec::ule::RawBytesULE;
use zerovec::{zeroslice, ZeroSlice};

use crate::provider::CollationData;

/// `true` iff `ce32`, when interpreted as `CollationElement32`,
/// is self-contained.
#[cfg(feature = "datagen")]
pub fn is_self_contained(ce32: u32) -> bool {
    CollationElement32::new(ce32)
        .to_ce_self_contained()
        .is_some()
}

// Start `SmallVec` size constants.
//
// These are the on-stack buffer sizes. If the buffers need
// to grow larger, they are spilled to the heap.
//
// TODO(#2005): Figure out good sizes for these.

/// The number of full 64-bit collation units that get buffered
/// in the primary comparison loop so that they can be examined
/// by the subsequent comparison stregths.
///
/// Note 1: If a primary difference is found, the comparison
/// returns early, so these buffers end up holding all the
/// collation elements only if there is no primary difference.
///
/// Note 2: Unfortunately for now, a sentinel value signaling
/// the end of input gets written into the buffer in addition
/// to the real collation elements.
///
/// This should probably either be halved to 4 on the logic
/// that especially in the presence of the identical prefix
/// optimization, most comparisons return after a couple of
/// primary comparisons or increased to 32 on the logic that
/// such a buffer could better hold a file or human name that
/// differs on secordary or higher level.
pub(crate) const CE_BUFFER_SIZE: usize = 8;

/// The number of extra full 64-bit collation units that have
/// already been computed as part of an operation that yields
/// multiple collation units at a time.
const PENDING_CE_BUFFER_SIZE: usize = 6;

/// Either the identical prefix or the lookahead plus the next
/// upcoming character.
///
/// The longest contraction suffix in CLDR 40 is 7 characters long.
const UPCOMING_CHARACTER_BUFFER_SIZE: usize = 10;

/// The contiguous sequence of combining characters.
const COMBINING_CHARACTER_BUFFER_SIZE: usize = 7;

/// The sequence of digits in the numeric mode.
const DIGIT_BUFFER_SIZE: usize = 8;

/// The number of combining characters that a contraction has
/// matched.
const PENDING_REMOVALS_SIZE: usize = 1;

// End `SmallVec` constants

/// Marker that the decomposition does not round trip via NFC.
///
/// See components/normalizer/trie-value-format.md
pub(crate) const NON_ROUND_TRIP_MARKER: u32 = 1 << 30;

/// Marker that the first character of the decomposition
/// can combine backwards.
///
/// See components/normalizer/trie-value-format.md
pub(crate) const BACKWARD_COMBINING_MARKER: u32 = 1 << 31;

/// Mask for the bits have to be zero for this to be a BMP
/// singleton decomposition, or value baked into the surrogate
/// range.
///
/// See components/normalizer/trie-value-format.md
pub(crate) const HIGH_ZEROS_MASK: u32 = 0x3FFF0000;

/// Mask for the bits have to be zero for this to be a complex
/// decomposition.
///
/// See components/normalizer/trie-value-format.md
pub(crate) const LOW_ZEROS_MASK: u32 = 0xFFE0;

/// Marker value for U+FDFA in NFKD. (Unified with
/// `HANGUL_SYLLABLE_MARKER`, but they differ by
/// `NON_ROUND_TRIP_MARKER`.)
///
/// See components/normalizer/trie-value-format.md
const FDFA_MARKER: u16 = 1;

/// Marker value for Hangul syllables. (Unified with `FDFA_MARKER`,
/// but they differ by `NON_ROUND_TRIP_MARKER`.)
///
/// See components/normalizer/trie-value-format.md
pub(crate) const HANGUL_SYLLABLE_MARKER: u32 = 1;

/// Checks if a trie value carries a (non-zero) canonical
/// combining class.
///
/// See components/normalizer/trie-value-format.md
fn trie_value_has_ccc(trie_value: u32) -> bool {
    (trie_value & 0x3FFFFE00) == 0xD800
}

/// Checks if the trie signifies a special non-starter decomposition.
///
/// See components/normalizer/trie-value-format.md
fn trie_value_indicates_special_non_starter_decomposition(trie_value: u32) -> bool {
    (trie_value & 0x3FFFFF00) == 0xD900
}

/// Checks if a trie value signifies a character whose decomposition
/// starts with a non-starter.
///
/// See components/normalizer/trie-value-format.md
fn decomposition_starts_with_non_starter(trie_value: u32) -> bool {
    trie_value_has_ccc(trie_value)
}

/// Extracts a canonical combining class (possibly zero) from a trie value.
///
/// See components/normalizer/trie-value-format.md
fn ccc_from_trie_value(trie_value: u32) -> CanonicalCombiningClass {
    if trie_value_has_ccc(trie_value) {
        CanonicalCombiningClass::from_icu4c_value(trie_value as u8)
    } else {
        CanonicalCombiningClass::NotReordered
    }
}

// These constants originate from page 143 of Unicode 14.0
pub(crate) const HANGUL_S_BASE: u32 = 0xAC00;
pub(crate) const HANGUL_L_BASE: u32 = 0x1100;
pub(crate) const HANGUL_V_BASE: u32 = 0x1161;
pub(crate) const HANGUL_T_BASE: u32 = 0x11A7;
pub(crate) const HANGUL_T_COUNT: u32 = 28;
pub(crate) const HANGUL_N_COUNT: u32 = 588;
pub(crate) const HANGUL_S_COUNT: u32 = 11172;

pub(crate) const JAMO_COUNT: usize = 256; // 0x1200 - 0x1100

const COMBINING_DIACRITICS_BASE: usize = 0x0300;
const OPTIMIZED_DIACRITICS_LIMIT: usize = 0x034F;
pub(crate) const OPTIMIZED_DIACRITICS_MAX_COUNT: usize =
    OPTIMIZED_DIACRITICS_LIMIT - COMBINING_DIACRITICS_BASE;

pub(crate) const CASE_MASK: u16 = 0xC000;
pub(crate) const TERTIARY_MASK: u16 = 0x3F3F; // ONLY_TERTIARY_MASK in ICU4C
pub(crate) const QUATERNARY_MASK: u16 = 0xC0;

// A CE32 is special if its low byte is this or greater.
// Impossible case bits 11 mark special CE32s.
// This value itself is used to indicate a fallback to the root collation.
const SPECIAL_CE32_LOW_BYTE: u8 = 0xC0;
pub(crate) const FALLBACK_CE32: CollationElement32 =
    CollationElement32(SPECIAL_CE32_LOW_BYTE as u32);
const LONG_PRIMARY_CE32_LOW_BYTE: u8 = 0xC1; // SPECIAL_CE32_LOW_BYTE | LONG_PRIMARY_TAG
/// Used only as a placeholder on the indentical prefix path.
/// The requirement is that this CE32 fails the quick mapping to a primary,
/// which is does, because the tag byte is higher than
/// `LONG_PRIMARY_CE32_LOW_BYTE`.
pub(crate) const IDENTICAL_PREFIX_HANGUL_MARKER_CE32: CollationElement32 = CollationElement32(0xC2);
const COMMON_SECONDARY_CE: u64 = 0x05000000;
const COMMON_TERTIARY_CE: u64 = 0x0500;
const COMMON_SEC_AND_TER_CE: u64 = COMMON_SECONDARY_CE | COMMON_TERTIARY_CE;

const UNASSIGNED_IMPLICIT_BYTE: u8 = 0xFE;

// /// Set if there is no match for the single (no-suffix) character itself.
// /// This is only possible if there is a prefix.
// /// In this case, discontiguous contraction matching cannot add combining marks
// /// starting from an empty suffix.
// /// The default CE32 is used anyway if there is no suffix match.
// const CONTRACT_SINGLE_CP_NO_MATCH: u32 = 0x100;

/// Set if the first character of every contraction suffix has lccc!=0.
const CONTRACT_NEXT_CCC: u32 = 0x200;
/// Set if any contraction suffix ends with lccc!=0.
const CONTRACT_TRAILING_CCC: u32 = 0x400;
/// Set if at least one contraction suffix contains a starter
const CONTRACT_HAS_STARTER: u32 = 0x800;

// const NO_CE32: CollationElement32 = CollationElement32::default();
// constants named NO_CE* : End of input. Only used in runtime code, not stored in data.
pub(crate) const NO_CE: CollationElement = CollationElement::default();
pub(crate) const NO_CE_PRIMARY: u32 = 1; // not a left-adjusted weight
                                         // const NO_CE_NON_PRIMARY: NonPrimary = NonPrimary::default();
pub(crate) const NO_CE_SECONDARY: u16 = 0x0100;
pub(crate) const NO_CE_TERTIARY: u16 = 0x0100;
pub(crate) const NO_CE_QUATERNARY: u16 = 0x0100;
const NO_CE_VALUE: u64 =
    ((NO_CE_PRIMARY as u64) << 32) | ((NO_CE_SECONDARY as u64) << 16) | (NO_CE_TERTIARY as u64); // 0x101000100

// See ICU4C collation.h and https://www.unicode.org/reports/tr10/#Trailing_Weights
pub(crate) const FFFD_PRIMARY: u32 = 0xFFFD0000; // U+FFFD
pub(crate) const FFFD_CE_VALUE: u64 = ((FFFD_PRIMARY as u64) << 32) | COMMON_SEC_AND_TER_CE;
pub(crate) const FFFD_CE: CollationElement = CollationElement(FFFD_CE_VALUE);
pub(crate) const FFFD_CE32_VALUE: u32 = 0xFFFD0505;
pub(crate) const FFFD_CE32: CollationElement32 = CollationElement32(FFFD_CE32_VALUE);

pub(crate) const EMPTY_U16: &ZeroSlice<u16> = zeroslice![];
const SINGLE_REPLACEMENT_CHARACTER_U16: &ZeroSlice<u16> =
    zeroslice!(u16; <u16 as AsULE>::ULE::from_unsigned; [REPLACEMENT_CHARACTER as u16]);

pub(crate) const EMPTY_CHAR: &ZeroSlice<char> = zeroslice![];
const SINGLE_REPLACEMENT_CHARACTER_CHAR: &ZeroSlice<char> =
    zeroslice!(char; <char as AsULE>::ULE::from_aligned; [REPLACEMENT_CHARACTER]);

/// If `opt` is `Some`, unwrap it. If `None`, panic if debug assertions
/// are enabled and return `default` if debug assertions are not enabled.
///
/// Use this only if the only reason why `opt` could be `None` is bogus
/// data from the provider.
#[inline(always)]
pub(crate) fn unwrap_or_gigo<T>(opt: Option<T>, default: T) -> T {
    if let Some(val) = opt {
        val
    } else {
        // GIGO case
        debug_assert!(false);
        default
    }
}

/// Convert a `u32` _obtained from data provider data_ to `char`.
#[inline(always)]
pub(crate) fn char_from_u32(u: u32) -> char {
    unwrap_or_gigo(core::char::from_u32(u), REPLACEMENT_CHARACTER)
}

/// Convert a `u16` _obtained from data provider data_ to `char`.
#[inline(always)]
fn char_from_u16(u: u16) -> char {
    char_from_u32(u32::from(u))
}

#[inline(always)]
fn in_inclusive_range(c: char, start: char, end: char) -> bool {
    u32::from(c).wrapping_sub(u32::from(start)) <= (u32::from(end) - u32::from(start))
}

/// Special-CE32 tags, from bits 3..0 of a special 32-bit CE.
/// Bits 31..8 are available for tag-specific data.
/// Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
#[derive(Eq, PartialEq, Debug)]
#[allow(dead_code)]
#[repr(u8)] // This repr is necessary for transmute safety
pub(crate) enum Tag {
    /// Fall back to the base collator.
    /// This is the tag value in [`SPECIAL_CE32_LOW_BYTE`] and [`FALLBACK_CE32`].
    /// Bits 31..8: Unused, 0.
    Fallback = 0,
    /// Long-primary CE with [`COMMON_SEC_AND_TER_CE`].
    /// Bits 31..8: Three-byte primary.
    LongPrimary = 1,
    /// Long-secondary CE with zero primary.
    /// Bits 31..16: Secondary weight.
    /// Bits 15.. 8: Tertiary weight.
    LongSecondary = 2,
    /// Unused.
    /// May be used in the future for single-byte secondary CEs (`SHORT_SECONDARY_TAG`),
    /// storing the secondary in bits 31..24, the ccc in bits 23..16,
    /// and the tertiary in bits 15..8.
    Reserved3 = 3,
    /// Latin mini expansions of two simple CEs [pp, 05, tt] [00, ss, 05].
    /// Bits 31..24: Single-byte primary weight pp of the first CE.
    /// Bits 23..16: Tertiary weight tt of the first CE.
    /// Bits 15.. 8: Secondary weight ss of the second CE.
    /// Unused by ICU4X, may get repurposed for jamo expansions is Korean search.
    LatinExpansion = 4,
    /// Points to one or more simple/long-primary/long-secondary 32-bit CE32s.
    /// Bits 31..13: Index into `uint32_t` table.
    /// Bits 12.. 8: Length=1..31.
    Expansion32 = 5,
    /// Points to one or more 64-bit CEs.
    /// Bits 31..13: Index into CE table.
    /// Bits 12.. 8: Length=1..31.
    Expansion = 6,
    /// Builder data, used only in the `CollationDataBuilder`, not in runtime data.
    ///
    /// If bit 8 is 0: Builder context, points to a list of context-sensitive mappings.
    /// Bits 31..13: Index to the builder's list of `ConditionalCE32` for this character.
    /// Bits 12.. 9: Unused, 0.
    ///
    /// If bit 8 is 1 (`IS_BUILDER_JAMO_CE32`): Builder-only jamoCE32 value.
    /// The builder fetches the Jamo CE32 from the trie.
    /// Bits 31..13: Jamo code point.
    /// Bits 12.. 9: Unused, 0.
    BuilderData = 7,
    /// Points to prefix trie.
    /// Bits 31..13: Index into prefix/contraction data.
    /// Bits 12.. 8: Unused, 0.
    Prefix = 8,
    /// Points to contraction data.
    /// Bits 31..13: Index into prefix/contraction data.
    /// Bits 12..11: Unused, 0.
    /// Bit      10: `CONTRACT_TRAILING_CCC` flag.
    /// Bit       9: `CONTRACT_NEXT_CCC` flag.
    /// Bit       8: `CONTRACT_SINGLE_CP_NO_MATCH` flag.
    Contraction = 9,
    /// Decimal digit.
    /// Bits 31..13: Index into `uint32_t` table for non-numeric-collation CE32.
    /// Bit      12: Unused, 0.
    /// Bits 11.. 8: Digit value 0..9.
    Digit = 10,
    /// Tag for U+0000, for moving the NUL-termination handling
    /// from the regular fastpath into specials-handling code.
    /// Bits 31..8: Unused, 0.
    /// Not used by ICU4X.
    U0000 = 11,
    /// Tag for a Hangul syllable.
    /// Bits 31..9: Unused, 0.
    /// Bit      8: `HANGUL_NO_SPECIAL_JAMO` flag.
    /// Not used by ICU4X, may get reused for compressing Hanja expansions.
    Hangul = 12,
    /// Tag for a lead surrogate code unit.
    /// Optional optimization for UTF-16 string processing.
    /// Bits 31..10: Unused, 0.
    ///       9.. 8: =0: All associated supplementary code points are unassigned-implicit.
    ///              =1: All associated supplementary code points fall back to the base data.
    ///              else: (Normally 2) Look up the data for the supplementary code point.
    /// Not used by ICU4X.
    LeadSurrogate = 13,
    /// Tag for CEs with primary weights in code point order.
    /// Bits 31..13: Index into CE table, for one data "CE".
    /// Bits 12.. 8: Unused, 0.
    ///
    /// This data "CE" has the following bit fields:
    /// Bits 63..32: Three-byte primary pppppp00.
    ///      31.. 8: Start/base code point of the in-order range.
    ///           7: Flag isCompressible primary.
    ///       6.. 0: Per-code point primary-weight increment.
    Offset = 14,
    /// Implicit CE tag. Compute an unassigned-implicit CE.
    /// All bits are set (`UNASSIGNED_CE32=0xffffffff`).
    Implicit = 15,
}

/// A compressed form of a collation element as stored in the collation
/// data.
///
/// A `CollationElement32` can be "normal" or "special".
/// Bits 7 and 6 are case bits for the "normal" case and setting
/// both is an impossible case bit combination. Hence, "special"
/// `CollationElement32`s are marked by setting both case bits
/// to 1. This is equivalent with the low byte being less than
/// `SPECIAL_CE32_LOW_BYTE` (0xC0, i.e. 0b11000000) in the "normal"
/// case and equal to or greater in the "special" case.
///
/// For the normal case:
/// Bits: 31..16: Primary weight
/// Bits: 15..8: Secondary weight
/// Bits:  7..6: Case bits (cannot both be 1 simultaneously)
/// Bits:  5..0: The high part of the discontiguous tertiary weight
/// (The quaternary weight and the low part of the discontiguous
/// tertiary weight are zero.)
///
/// For the special case:
/// Bits 31..8: tag-specific; see the documentation for `Tag`.
/// Bits  7..6: The specialness marker; both bits set to 1
/// Bits  5..4: Reserved. May be used in the future to indicate lccc!=0 and tccc!=0.
/// Bits  3..0: the tag (bit-compatible with `Tag`)
#[derive(Copy, Clone, PartialEq, Debug)]
pub(crate) struct CollationElement32(u32);

impl CollationElement32 {
    #[inline(always)]
    pub fn new(bits: u32) -> Self {
        CollationElement32(bits)
    }

    #[inline(always)]
    pub fn new_from_ule(ule: RawBytesULE<4>) -> Self {
        CollationElement32(u32::from_unaligned(ule))
    }

    #[inline(always)]
    fn low_byte(self) -> u8 {
        self.0 as u8
    }

    #[inline(always)]
    pub(crate) fn tag_checked(self) -> Option<Tag> {
        let t = self.low_byte();
        if t < SPECIAL_CE32_LOW_BYTE {
            None
        } else {
            Some(self.tag())
        }
    }

    /// Returns the tag if this element is special.
    /// Non-specialness should first be checked by seeing if either
    /// `to_ce_simple_or_long_primary()` or `to_ce_self_contained()`
    /// returns non-`None`.
    ///
    /// # Panics
    ///
    /// Panics in debug mode if called on a non-special element.
    #[inline(always)]
    pub(crate) fn tag(self) -> Tag {
        debug_assert!(self.low_byte() >= SPECIAL_CE32_LOW_BYTE);
        // Safety: Tag has values 0 to 15, which are filtered for with the 0xF mask.
        unsafe { core::mem::transmute(self.low_byte() & 0xF) }
    }

    /// Simplest possible check for the Latin1 fast path.
    #[cfg(feature = "latin1")]
    #[inline(always)]
    pub fn to_primary_simple(self) -> Option<u32> {
        let t = self.low_byte();
        if t < SPECIAL_CE32_LOW_BYTE {
            // Not special
            Some(self.0 & 0xFFFF0000)
        } else {
            None
        }
    }

    /// Extract only the first primary in the quick check without identical
    /// prefix.
    #[inline(always)]
    pub fn to_primary_in_quick_check(self, data: &CollationData) -> Option<u32> {
        let t = self.low_byte();
        if t < SPECIAL_CE32_LOW_BYTE {
            // Not special
            Some(self.0 & 0xFFFF0000)
        } else if t == LONG_PRIMARY_CE32_LOW_BYTE {
            Some(self.0 - u32::from(t))
        } else {
            let tag = self.tag();
            if tag == Tag::Expansion {
                // Hiragana in `ja` tailoring
                Some(data.get_primary_from_ces(self.index()))
            } else {
                None
            }
            // Note: If we start adding support for more tags,
            // we should probably do early exits for contractions
            // and potential Hangul syllables before checking
            // for expansion.
        }
    }

    /// Expands to 64 bits if the expansion is to a single 64-bit collation
    /// element and is not a long-secondary expansion.
    #[inline(always)]
    pub fn to_ce_simple_or_long_primary(self) -> Option<CollationElement> {
        let t = self.low_byte();
        if t < SPECIAL_CE32_LOW_BYTE {
            // Not special
            let as64 = u64::from(self.0);
            Some(CollationElement::new(
                ((as64 & 0xFFFF0000) << 32) | ((as64 & 0xFF00) << 16) | (u64::from(t) << 8),
            ))
        } else if t == LONG_PRIMARY_CE32_LOW_BYTE {
            let as64 = u64::from(self.0);
            Some(CollationElement::new(
                ((as64 - u64::from(t)) << 32) | COMMON_SEC_AND_TER_CE,
            ))
        } else {
            // Could still be long secondary (or not self-contained at all).
            // See `to_ce_self_contained()`.
            None
        }
    }

    /// Expands to 64 bits if the expansion is to a single 64-bit collation
    /// element.
    #[inline(always)]
    pub fn to_ce_self_contained(self) -> Option<CollationElement> {
        if let Some(ce) = self.to_ce_simple_or_long_primary() {
            return Some(ce);
        }
        if self.tag() == Tag::LongSecondary {
            Some(CollationElement::new(u64::from(self.0 & 0xffffff00)))
        } else {
            None
        }
    }

    /// Expands to 64 bits if the expansion is to a single 64-bit collation
    /// element or otherwise returns the collation element for U+FFFD.
    #[inline(always)]
    pub fn to_ce_self_contained_or_gigo(self) -> CollationElement {
        unwrap_or_gigo(self.to_ce_self_contained(), FFFD_CE)
    }

    /// Gets the length from this element.
    ///
    /// # Panics
    ///
    /// In debug builds if this element doesn't have a length.
    #[inline(always)]
    pub fn len(self) -> usize {
        debug_assert!(self.tag() == Tag::Expansion32 || self.tag() == Tag::Expansion);
        ((self.0 >> 8) & 31) as usize
    }

    /// Gets the index from this element.
    ///
    /// # Panics
    ///
    /// In debug builds if this element doesn't have an index.
    #[inline(always)]
    pub fn index(self) -> usize {
        debug_assert!(
            self.tag() == Tag::Expansion32
                || self.tag() == Tag::Expansion
                || self.tag() == Tag::Contraction
                || self.tag() == Tag::Digit
                || self.tag() == Tag::Prefix
                || self.tag() == Tag::Offset
        );
        (self.0 >> 13) as usize
    }

    #[inline(always)]
    pub fn digit(self) -> u8 {
        debug_assert!(self.tag() == Tag::Digit);
        ((self.0 >> 8) & 0xF) as u8
    }

    #[inline(always)]
    pub fn every_suffix_starts_with_combining(self) -> bool {
        debug_assert!(self.tag() == Tag::Contraction);
        (self.0 & CONTRACT_NEXT_CCC) != 0
    }
    #[inline(always)]
    pub fn at_least_one_suffix_contains_starter(self) -> bool {
        debug_assert!(self.tag() == Tag::Contraction);
        (self.0 & CONTRACT_HAS_STARTER) != 0
    }
    #[inline(always)]
    pub fn at_least_one_suffix_ends_with_non_starter(self) -> bool {
        debug_assert!(self.tag() == Tag::Contraction);
        (self.0 & CONTRACT_TRAILING_CCC) != 0
    }
}

impl Default for CollationElement32 {
    fn default() -> Self {
        CollationElement32(1) // NO_CE32
    }
}

/// A collation element is a 64-bit value.
///
/// Bits 63..32 are the primary weight.
/// Bits 31..16 are the secondary weight.
/// Bits 15..14 are the case bits.
/// Bits 13..8 and 5..0 are the (bitwise discontiguous) tertiary weight.
/// Bits 7..6 the quaternary weight.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct CollationElement(u64);

impl CollationElement {
    #[inline(always)]
    pub fn new(bits: u64) -> Self {
        CollationElement(bits)
    }

    #[inline(always)]
    pub fn new_from_primary(primary: u32) -> Self {
        CollationElement((u64::from(primary) << 32) | COMMON_SEC_AND_TER_CE)
    }

    #[inline(always)]
    pub fn new_from_secondary(secondary: u16) -> Self {
        CollationElement((u64::from(secondary) << 16) | COMMON_TERTIARY_CE)
    }

    #[inline(always)]
    pub fn new_implicit_from_char(c: char) -> Self {
        // Collation::unassignedPrimaryFromCodePoint
        // Create a gap before U+0000. Use c-1 for [first unassigned].
        let mut c_with_offset = u32::from(c) + 1;
        // Fourth byte: 18 values, every 14th byte value (gap of 13).
        let mut primary: u32 = 2 + (c_with_offset % 18) * 14;
        c_with_offset /= 18;
        // Third byte: 254 values
        primary |= (2 + (c_with_offset % 254)) << 8;
        c_with_offset /= 254;
        // Second byte: 251 values 04..FE excluding the primary compression bytes.
        primary |= (4 + (c_with_offset % 251)) << 16;
        // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18).
        primary |= u32::from(UNASSIGNED_IMPLICIT_BYTE) << 24;
        CollationElement::new_from_primary(primary)
    }

    #[inline(always)]
    pub fn clone_with_non_primary_zeroed(self) -> Self {
        CollationElement(self.0 & 0xFFFFFFFF00000000)
    }

    /// Get the primary weight
    #[inline(always)]
    pub fn primary(self) -> u32 {
        (self.0 >> 32) as u32
    }

    /// Get the non-primary weights
    #[inline(always)]
    pub fn non_primary(self) -> NonPrimary {
        NonPrimary::new(self.0 as u32)
    }

    /// Get the secondary weight
    #[inline(always)]
    pub fn secondary(self) -> u16 {
        self.non_primary().secondary()
    }
    #[inline(always)]
    pub fn quaternary(self) -> u32 {
        self.non_primary().quaternary()
    }
    #[inline(always)]
    pub fn tertiary_ignorable(self) -> bool {
        self.non_primary().tertiary_ignorable()
    }
    #[inline(always)]
    pub fn either_half_zero(self) -> bool {
        self.primary() == 0 || (self.0 as u32) == 0
    }

    #[inline(always)]
    pub const fn default() -> CollationElement {
        CollationElement(NO_CE_VALUE) // NO_CE
    }
}

impl Default for CollationElement {
    #[inline(always)]
    fn default() -> Self {
        CollationElement(NO_CE_VALUE) // NO_CE
    }
}

impl Default for &CollationElement {
    #[inline(always)]
    fn default() -> Self {
        &CollationElement(NO_CE_VALUE) // NO_CE
    }
}

/// The purpose of grouping the non-primary bits
/// into a struct is to allow for a future optimization
/// that specializes code over whether storage for primary
/// weights is needed or not. (I.e. whether to specialize
/// on `CollationElement` or `NonPrimary`.)
#[derive(Copy, Clone, PartialEq, Debug)]
pub(crate) struct NonPrimary(u32);

impl NonPrimary {
    /// Constructor
    pub fn new(bits: u32) -> Self {
        NonPrimary(bits)
    }
    /// Get the bits
    pub fn bits(self) -> u32 {
        self.0
    }
    /// Get the secondary weight
    #[inline(always)]
    pub fn secondary(self) -> u16 {
        (self.0 >> 16) as u16
    }
    /// Get the case bits as the high two bits of a u16
    #[inline(always)]
    pub fn case(self) -> u16 {
        (self.0 as u16) & CASE_MASK
    }
    /// Get the tertiary weight as u16 with the high
    /// two bits of each half zeroed.
    #[inline(always)]
    pub fn tertiary(self) -> u16 {
        (self.0 as u16) & TERTIARY_MASK
    }
    #[inline(always)]
    pub fn tertiary_ignorable(self) -> bool {
        (self.0 as u16) <= NO_CE_TERTIARY
    }
    /// Get the quaternary weight in the original
    /// storage bit positions with the other bits
    /// set to one.
    #[inline(always)]
    pub fn quaternary(self) -> u32 {
        self.0 | !(QUATERNARY_MASK as u32)
    }
    /// Get any combination of tertiary, case, and quaternary
    /// by mask.
    #[inline(always)]
    pub fn tertiary_case_quarternary(self, mask: u16) -> u16 {
        debug_assert!((mask & CASE_MASK) == CASE_MASK || (mask & CASE_MASK) == 0);
        debug_assert!((mask & TERTIARY_MASK) == TERTIARY_MASK || (mask & TERTIARY_MASK) == 0);
        debug_assert!((mask & QUATERNARY_MASK) == QUATERNARY_MASK || (mask & QUATERNARY_MASK) == 0);
        (self.0 as u16) & mask
    }

    #[inline(always)]
    pub fn case_quaternary(self) -> u16 {
        (self.0 as u16) & (CASE_MASK | QUATERNARY_MASK)
    }

    #[inline(always)]
    pub fn ignorable(self) -> bool {
        self.0 == 0
    }
}

impl Default for NonPrimary {
    #[inline(always)]
    fn default() -> Self {
        NonPrimary(0x01000100) // Low 32 bits of NO_CE
    }
}

/// This struct makes the handling of the `upcoming` buffer
/// easily so that trie lookups are done at most once. However,
/// when `upcoming[0]` is an undecomposed starter, we don't
/// need the ccc yet, and when lookahead has already done the
/// trie lookups, we don't need `trie_value`, as it is implied
/// by ccc.
//
// TODO(#2386): This struct carries redundant information, and
// `upcoming` should be split into a buffer of `CharacterAndClass`
//  and an `Option<CharacterAndTrieValue>`, but that refactoring
// isn't 100% necessary, so focusing on data format stability
// for 1.0.
//
// (Deliberately non-`Copy`, because `CharacterAndClass` is
// non-`Copy`.)
#[derive(Debug, Clone)]
pub(crate) struct CharacterAndClassAndTrieValue {
    c_and_c: CharacterAndClass,
    pub trie_val: u32,
}

impl CharacterAndClassAndTrieValue {
    pub fn new_with_non_decomposing_starter(c: char) -> Self {
        CharacterAndClassAndTrieValue {
            c_and_c: CharacterAndClass::new(c, CanonicalCombiningClass::NotReordered),
            trie_val: 0,
        }
    }
    pub fn new_with_non_zero_ccc(c: char, ccc: CanonicalCombiningClass) -> Self {
        CharacterAndClassAndTrieValue {
            c_and_c: CharacterAndClass::new(c, ccc),
            trie_val: 0xD800 | u32::from(ccc.to_icu4c_value()),
        }
    }
    pub fn new_with_non_special_decomposition_trie_val(c: char, trie_val: u32) -> Self {
        debug_assert!(!trie_value_indicates_special_non_starter_decomposition(
            trie_val
        ));
        CharacterAndClassAndTrieValue {
            c_and_c: CharacterAndClass::new_with_trie_value(c, trie_val),
            trie_val,
        }
    }
    pub fn new_with_trie_val(c: char, trie_val: u32) -> Self {
        if !trie_value_indicates_special_non_starter_decomposition(trie_val) {
            CharacterAndClassAndTrieValue {
                c_and_c: CharacterAndClass::new_with_trie_value(c, trie_val),
                trie_val,
            }
        } else {
            CharacterAndClassAndTrieValue {
                c_and_c: CharacterAndClass::new(c, CanonicalCombiningClass::from_icu4c_value(0xFF)),
                trie_val,
            }
        }
    }

    pub fn decomposition_starts_with_non_starter(&self) -> bool {
        decomposition_starts_with_non_starter(self.trie_val)
    }

    pub fn character(&self) -> char {
        self.c_and_c.character()
    }

    fn ccc(&self) -> CanonicalCombiningClass {
        let ret = self.c_and_c.ccc();
        debug_assert_ne!(ret, CanonicalCombiningClass::from_icu4c_value(0xFF));
        ret
    }
}

/// Pack a `char` and a `CanonicalCombiningClass` in
/// 32 bits (the former in the lower 24 bits and the
/// latter in the high 8 bits). The latter can be
/// initialized to 0xFF upon creation, in which case
/// it can be actually set later by calling
/// `set_ccc_from_trie_if_not_already_set`. This is
/// a micro optimization to avoid the Canonical
/// Combining Class trie lookup when there is only
/// one combining character in a sequence. This type
/// is intentionally non-`Copy` to get compiler help
/// in making sure that the class is set on the
/// instance on which it is intended to be set
/// and not on a temporary copy.
///
/// Note that 0xFF is won't be assigned to an actual
/// canonical combining class per definition D104
/// in The Unicode Standard.
//
// NOTE: The Pernosco debugger has special knowledge
// of this struct. Please do not change the bit layout
// or the crate-module-qualified name of this struct
// without coordination.
#[derive(Debug, Clone)]
// Safety invariant: The low 24 bits are a valid char
struct CharacterAndClass(u32);

impl CharacterAndClass {
    pub fn new(c: char, ccc: CanonicalCombiningClass) -> Self {
        // Safety invariant upheld here: the first half is a valid char
        // and the second half does not affect the low 24 bits
        CharacterAndClass(u32::from(c) | (u32::from(ccc.to_icu4c_value()) << 24))
    }
    pub fn new_with_placeholder(c: char) -> Self {
        // Safety invariant upheld here: the first half is a valid char
        // and the second half does not affect the low 24 bits
        CharacterAndClass(u32::from(c) | ((0xFF) << 24))
    }
    pub fn new_with_trie_value(c: char, trie_value: u32) -> Self {
        Self::new(c, ccc_from_trie_value(trie_value))
    }
    pub fn character(&self) -> char {
        // Safety: from the safety invariant, this extracts the low 24 bits
        unsafe { char::from_u32_unchecked(self.0 & 0xFF_FFFF) }
    }
    pub fn ccc(&self) -> CanonicalCombiningClass {
        // Safety invariant upheld here: The argument is outside of the low 24 bits,
        // and \0 is a valid character
        CanonicalCombiningClass::from_icu4c_value((self.0 >> 24) as u8)
    }
    pub fn character_and_ccc(&self) -> (char, CanonicalCombiningClass) {
        (self.character(), self.ccc())
    }
    pub fn set_ccc_from_trie_if_not_already_set<'data, T: AbstractCodePointTrie<'data, u32>>(
        &mut self,
        trie: &T,
    ) {
        if self.0 >> 24 != 0xFF {
            return;
        }
        let scalar = self.character();
        // Safety invariant upheld here: The first half doesn't affect the lower 24 bits,
        // and the second half was taken from the old `self` which had these invariants upheld already.
        self.0 = ((ccc_from_trie_value(trie.scalar(scalar)).to_icu4c_value() as u32) << 24)
            | u32::from(scalar);
    }
}

/// Iterator that transforms an iterator over `char` into an iterator
/// over `CollationElement` with a tailoring.
/// Not a real Rust iterator: Instead of `None` uses `NO_CE` to indicate
/// end of iteration to optimize comparison.
///
/// It is _extremely_ important for performance that `SmallVec`s not be
/// moved. To facilitate move-avoidance, this struct has the following
/// life cycle where `new` returns the struct in a state that is not
/// yet valid for a `next` call until `init` is called:
///
/// 1. `new`.
/// 2. Some number of calls to `iter_next_before_init` and
///    `prepend_upcoming_before_init`.
/// 3. `init`.
/// 4. Some number of calls to `next`.
pub(crate) struct CollationElements<'data, I, T>
where
    I: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32>,
    T: AbstractCodePointTrie<'data, u32>,
{
    /// See components/normalizer/trie-value-format.md for the trie wrapped in `iter`.
    iter: I,
    /// Already computed but not yet returned `CollationElement`s.
    pending: SmallVec<[CollationElement; PENDING_CE_BUFFER_SIZE]>, // TODO(#2005): Figure out good length
    /// The index of the next item to be returned from `pending`. The purpose
    /// of this index is to avoid moving the rest of the items.
    pending_pos: usize,
    /// The characters most previously seen (or never-matching placeholders)
    /// CLDR, as of 40, has two kinds of prefixes:
    /// Prefixes that contain a single starter
    /// Prefixes that contain a starter followed by either U+3099 or U+309A
    /// Last-pushed is at index 0 and previously-pushed at index 1
    prefix: [char; 2],
    /// `upcoming` holds the characters that have already been read from
    /// `iter` but haven't yet been mapped to `CollationElement`s.
    ///
    /// Typically, `upcoming` holds one character and corresponds semantically
    /// to `pending_unnormalized_starter` in `icu::normalizer::Decomposition`.
    /// This is why there isn't a move avoidance optimization similar to
    /// `pending_pos` above for this buffer. A complex decomposition, a
    /// Hangul syllable followed by a non-starter, or lookahead can cause
    /// `pending` to hold more than one `char`.
    ///
    /// Invariant: `upcoming` is allowed to become empty only after `iter`
    /// has been exhausted.
    ///
    /// Invariant: (Checked by `debug_assert!`) At the start of `next()` call,
    /// if `upcoming` isn't empty (with `iter` having been exhausted), the
    /// first `char` in `upcoming` must have its decomposition start with a
    /// starter.
    ///
    /// TODO: Reverse the order, since now `insert(0, x)` and `remove(0)`
    /// are used more often than `push()` and `pop()`.
    upcoming: SmallVec<[CharacterAndClassAndTrieValue; UPCOMING_CHARACTER_BUFFER_SIZE]>,
    /// The root collation data.
    root: &'data CollationData<'data>,
    /// Tailoring if applicable.
    tailoring: &'data CollationData<'data>,
    /// The `CollationElement32` mapping for the Hangul Jamo block.
    ///
    /// Note: in ICU4C the jamo table contains only modern jamo. Here, the jamo table contains the whole Unicode block.
    jamo: &'data [<u32 as AsULE>::ULE; JAMO_COUNT],
    /// The `CollationElement32` mapping for the Combining Diacritical Marks block.
    diacritics: &'data ZeroSlice<u16>,
    /// NFD complex decompositions on the BMP
    scalars16: &'data ZeroSlice<u16>,
    /// NFD complex decompositions on supplementary planes
    scalars32: &'data ZeroSlice<char>,
    /// If numeric mode is enabled, the 8 high bits of the numeric primary.
    /// `None` if disabled.
    numeric_primary: Option<u8>,
    /// Whether the Lithuanian combining dot above handling is enabled.
    lithuanian_dot_above: bool,
    /// Whether `upcoming` (except the last item) has been normalized already
    upcoming_normalized: bool,
    #[cfg(debug_assertions)]
    /// Whether `iter` has been exhausted
    iter_exhausted: bool,
    #[cfg(debug_assertions)]
    /// Whether `init` has been called
    initialized: bool,
    _phantom: PhantomData<T>,
}

impl<'data, I, T> CollationElements<'data, I, T>
where
    I: Iterator<Item = (char, u32)> + WithTrie<'data, T, u32> + 'data,
    T: AbstractCodePointTrie<'data, u32> + 'data,
{
    #[expect(clippy::too_many_arguments)]
    pub fn new(
        delegate: I,
        root: &'data CollationData,
        tailoring: &'data CollationData,
        jamo: &'data [<u32 as AsULE>::ULE; JAMO_COUNT],
        diacritics: &'data ZeroSlice<u16>,
        tables: &'data DecompositionTables,
        numeric_primary: Option<u8>,
        lithuanian_dot_above: bool,
    ) -> Self {
        CollationElements::<I, T> {
            iter: delegate,
            pending: SmallVec::new(),
            pending_pos: 0,
            prefix: ['\u{FFFF}'; 2],
            upcoming: SmallVec::new(),
            root,
            tailoring,
            jamo,
            diacritics,
            scalars16: &tables.scalars16,
            scalars32: &tables.scalars24,
            numeric_primary,
            lithuanian_dot_above,
            upcoming_normalized: false,
            #[cfg(debug_assertions)]
            iter_exhausted: false,
            #[cfg(debug_assertions)]
            initialized: false,
            _phantom: PhantomData,
        }
    }

    pub fn iter_next_before_init(&mut self) -> Option<CharacterAndClassAndTrieValue> {
        #[cfg(debug_assertions)]
        debug_assert!(!self.initialized);
        self.iter_next()
    }

    pub fn prepend_upcoming_before_init(&mut self, c: CharacterAndClassAndTrieValue) {
        #[cfg(debug_assertions)]
        debug_assert!(!self.initialized);
        self.upcoming.insert(0, c);
    }

    pub fn init(&mut self) {
        // TODO: Consider removing the invariant that this method upholds.
        #[cfg(debug_assertions)]
        {
            debug_assert!(!self.initialized);
            self.initialized = true;
        }

        loop {
            // Ensure the last item is a starter (unless)
            // iter exhausted.
            if let Some(last) = self.upcoming.last() {
                if last.decomposition_starts_with_non_starter() {
                    // Not using `while let` to be able to set `iter_exhausted`
                    loop {
                        if let Some(ch) = self.iter_next() {
                            let starter = !ch.decomposition_starts_with_non_starter();
                            self.upcoming.push(ch);
                            if starter {
                                break;
                            }
                        } else {
                            #[cfg(debug_assertions)]
                            {
                                self.iter_exhausted = true;
                            }
                            break;
                        }
                    }
                }
                if let Some(first) = self.upcoming.first() {
                    if !first.decomposition_starts_with_non_starter() {
                        return;
                    }
                }
            } else {
                // Ensure that `upcoming` starts with a starter in the case where
                // we get here with an empty `upcoming` due to the identical prefix
                // code exiting right away, because the very first code units differ.
                if let Some(ch) = self.iter_next() {
                    let starter = !ch.decomposition_starts_with_non_starter();
                    self.upcoming.push(ch);
                    if starter {
                        return;
                    }
                    // Loop back to uphoad the invariant that `upcoming` ends with
                    // a character whose decomposition starts with a starter unless
                    // the iterator has been exhausted.
                    continue;
                } else {
                    #[cfg(debug_assertions)]
                    {
                        self.iter_exhausted = true;
                    }
                    return;
                }
            }
            break;
        }

        // The case where upcoming does not start with a starter.
        // Ideally, we'd have something more specialized here that would extract
        // the code path that `self.next()` runs after dealing with the U+0000.
        self.upcoming.insert(
            0,
            CharacterAndClassAndTrieValue::new_with_non_decomposing_starter('\u{0}'),
        ); // Make sure the process always begins with a starter
        let _ = self.next(); // Remove the placeholder starter
    }

    fn iter_next(&mut self) -> Option<CharacterAndClassAndTrieValue> {
        let (c, trie_val) = self.iter.next()?;
        Some(CharacterAndClassAndTrieValue::new_with_trie_val(
            c, trie_val,
        ))
    }

    fn next_internal(&mut self) -> Option<CharacterAndClassAndTrieValue> {
        if self.upcoming.is_empty() {
            return None;
        }
        // TODO: something more efficient here.
        let ret = self.upcoming.remove(0);
        if self.upcoming.is_empty() {
            if let Some(c) = self.iter_next() {
                self.upcoming.push(c);
            } else {
                #[cfg(debug_assertions)]
                {
                    self.iter_exhausted = true;
                }
            }
        }
        Some(ret)
    }

    fn maybe_gather_combining(&mut self) {
        if self.upcoming.len() != 1 {
            return;
        }
        // index has to be in range due to the check above.
        // rewriting with `get()` would result in two checks.
        #[expect(clippy::indexing_slicing)]
        if !self.upcoming[0].decomposition_starts_with_non_starter() {
            return;
        }
        // We now have a single character that decomposes to start with
        // a non-starter. Decompose it and assign the real canonical combining class.
        let first = self.upcoming.remove(0);
        self.push_decomposed_combining(first);
        // Not using `while let` to be able to set `iter_exhausted`
        loop {
            if let Some(ch) = self.iter_next() {
                if ch.decomposition_starts_with_non_starter() {
                    self.push_decomposed_combining(ch);
                } else {
                    // Got a new starter
                    self.upcoming.push(ch);
                    break;
                }
            } else {
                #[cfg(debug_assertions)]
                {
                    self.iter_exhausted = true;
                }
                break;
            }
        }
    }

    /// Ensures that `upcoming` is normalized to NFD, except:
    /// 1. When the last item is a starter, it isn't necessarily normalized.
    /// 2. Hangul syllable are unnormalized.
    fn ensure_upcoming_normalized(&mut self) {
        if self.upcoming_normalized {
            return;
        }
        self.upcoming_normalized = true;
        let without_trailing_starter = if let Some((last, head)) = self.upcoming.split_last() {
            if !last.decomposition_starts_with_non_starter() {
                if head.is_empty() {
                    // There is a single starter, which isn't required
                    // to be normalized.
                    return;
                } else {
                    head
                }
            } else {
                &self.upcoming[..]
            }
        } else {
            // Make the assertion conditional to make CI happy.
            #[cfg(debug_assertions)]
            debug_assert!(self.iter_exhausted);
            return;
        };

        // It would be better to attempt to normalize in place, but let's do at
        // least this.
        if without_trailing_starter.iter().all(|c| {
            (c.trie_val
                & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER | HANGUL_SYLLABLE_MARKER))
                == 0
        }) {
            return;
        }

        let mut unnormalized = core::mem::take(&mut self.upcoming);
        let last_index = unnormalized.len() - 1;
        // Indexing is for debug assert only.
        #[expect(clippy::indexing_slicing)]
        {
            debug_assert!(!unnormalized[0].decomposition_starts_with_non_starter());
        }
        let mut start_combining = 0;
        for (i, c) in unnormalized.drain(..).enumerate() {
            if c.decomposition_starts_with_non_starter() {
                self.push_decomposed_combining(c);
            } else if i == last_index {
                // Indices are in range by construction, so indexing is OK.
                #[expect(clippy::indexing_slicing)]
                self.upcoming[start_combining..].sort_by_key(|c| c.ccc());
                self.upcoming.push(c);
                return;
            } else {
                // Indices are in range by construction, so indexing is OK.
                #[expect(clippy::indexing_slicing)]
                self.upcoming[start_combining..].sort_by_key(|c| c.ccc());
                start_combining = self.push_decomposed_starter(c);
            }
        }
        // Make the assertion conditional to make CI happy.
        #[cfg(debug_assertions)]
        debug_assert!(self.iter_exhausted);
        // Indices are in range by construction, so indexing is OK.
        #[expect(clippy::indexing_slicing)]
        self.upcoming[start_combining..].sort_by_key(|c| c.ccc());
    }

    fn push_decomposed_combining(&mut self, c: CharacterAndClassAndTrieValue) {
        if !trie_value_indicates_special_non_starter_decomposition(c.trie_val) {
            debug_assert!(trie_value_has_ccc(c.trie_val));
            self.upcoming.push(c);
            return;
        }

        // The Tibetan special cases are starters that decompose into non-starters.
        match c.character() {
            '\u{0340}' => {
                // COMBINING GRAVE TONE MARK
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0300}',
                        CanonicalCombiningClass::Above,
                    ));
            }
            '\u{0341}' => {
                // COMBINING ACUTE TONE MARK
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0301}',
                        CanonicalCombiningClass::Above,
                    ));
            }
            '\u{0343}' => {
                // COMBINING GREEK KORONIS
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0313}',
                        CanonicalCombiningClass::Above,
                    ));
            }
            '\u{0344}' => {
                // COMBINING GREEK DIALYTIKA TONOS
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0308}',
                        CanonicalCombiningClass::Above,
                    ));
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0301}',
                        CanonicalCombiningClass::Above,
                    ));
            }
            '\u{0F73}' => {
                // TIBETAN VOWEL SIGN II
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F71}',
                        CanonicalCombiningClass::CCC129,
                    ));
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F72}',
                        CanonicalCombiningClass::CCC130,
                    ));
            }
            '\u{0F75}' => {
                // TIBETAN VOWEL SIGN UU
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F71}',
                        CanonicalCombiningClass::CCC129,
                    ));
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F74}',
                        CanonicalCombiningClass::CCC132,
                    ));
            }
            '\u{0F81}' => {
                // TIBETAN VOWEL SIGN REVERSED II
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F71}',
                        CanonicalCombiningClass::CCC129,
                    ));
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_zero_ccc(
                        '\u{0F80}',
                        CanonicalCombiningClass::CCC130,
                    ));
            }
            _ => {
                // GIGO case
                debug_assert!(false);
            }
        }
    }

    fn push_decomposed_starter(&mut self, c: CharacterAndClassAndTrieValue) -> usize {
        let mut search_start_combining = false;
        let old_len = self.upcoming.len();
        // Not inserting early returns below to keep the same structure
        // as in the ce32 mapping code.

        // Hangul syllable check omitted, because it's fine not to decompose
        // Hangul syllables in lookahead, because Hangul isn't allowed to
        // participate in contractions, and the trie default is that a character
        // is its own decomposition.

        // See components/normalizer/trie-value-format.md
        let decomposition = c.trie_val;
        if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER))
            <= HANGUL_SYLLABLE_MARKER
        {
            // The character is its own decomposition (or Hangul syllable)
            // Set the Canonical Combining Class to zero
            self.upcoming.push(
                CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(c.character()),
            );
        } else {
            let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0;
            let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0;
            if !high_zeros && !low_zeros {
                // Decomposition into two BMP characters: starter and non-starter
                let starter = char_from_u32(decomposition & 0x7FFF);
                let low_c = char_from_u32((decomposition >> 15) & 0x7FFF);
                self.upcoming
                    .push(CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(starter));
                let trie_value = self.iter.trie().bmp(low_c as u16);
                self.upcoming.push(
                    CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(
                        low_c, trie_value,
                    ),
                );
            } else if high_zeros {
                let singleton = decomposition as u16;
                debug_assert_ne!(
                    singleton, FDFA_MARKER,
                    "How come U+FDFA NFKD marker seen in NFD?"
                );
                if (singleton & 0xFF00) == 0xD800 {
                    // We're at the end of the stream, so we aren't dealing with the
                    // next undecomposed starter but are dealing with an
                    // already-decomposed non-starter. Just put it back.
                    self.upcoming.push(c);
                    // Make the assertion conditional to make CI happy.
                    #[cfg(debug_assertions)]
                    debug_assert!(self.iter_exhausted);
                } else {
                    // Decomposition into one BMP character
                    self.upcoming.push(
                        CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(
                            char_from_u16(singleton),
                        ),
                    );
                }
            } else {
                debug_assert!(low_zeros);
                // Only 12 of 14 bits used as of Unicode 16.
                let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1;
                // Only 3 of 4 bits used as of Unicode 16.
                let len_bits = decomposition & 0b1111;
                let only_non_starters_in_trail = (decomposition & 0b10000) != 0;
                if offset < self.scalars16.len() {
                    let len = (len_bits + 2) as usize;
                    for u in unwrap_or_gigo(
                        self.scalars16.get_subslice(offset..offset + len),
                        SINGLE_REPLACEMENT_CHARACTER_U16, // single instead of empty for consistency with the other code path
                    )
                    .iter()
                    {
                        let ch = char_from_u16(u);
                        let trie_value = self.iter.trie().bmp(u);
                        self.upcoming
                            .push(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(ch, trie_value));
                    }
                } else {
                    let len = (len_bits + 1) as usize;
                    let offset32 = offset - self.scalars16.len();
                    for ch in unwrap_or_gigo(
                        self.scalars32.get_subslice(offset32..offset32 + len),
                        SINGLE_REPLACEMENT_CHARACTER_CHAR, // single instead of empty for consistency with the other code path
                    )
                    .iter()
                    {
                        let trie_value = self.iter.trie().scalar(ch);
                        self.upcoming
                            .push(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(ch, trie_value));
                    }
                }
                search_start_combining = !only_non_starters_in_trail;
            }
        }
        if search_start_combining {
            // The decomposition contains starters. As of Unicode 14,
            // There are two possible patterns:
            // BMP: starter, starter, non-starter
            // Plane 1: starter, starter.
            // However, for forward compatibility, support any combination
            // and search for the last starter.
            let mut i = self.upcoming.len() - 1;
            loop {
                if let Some(ch) = self.upcoming.get(i) {
                    if ch.decomposition_starts_with_non_starter() {
                        i -= 1;
                        continue;
                    }
                    break;
                }
                // GIGO case
                debug_assert!(false);
                // This will wrap to zero below
                i = usize::MAX;
                break;
            }
            i + 1
        } else {
            old_len + 1
        }
    }

    // Decomposes `c`, pushes it to `self.upcoming` (unless the character is
    // a Hangul syllable; Hangul isn't allowed to participate in contractions),
    // gathers the following combining characters from `self.iter` and the following starter.
    // Sorts the combining characters and leaves the starter at the end
    // unnormalized. The trailing unnormalized starter doesn't get appended if
    // `self.iter` is exhausted.
    fn push_decomposed_and_gather_combining(&mut self, c: CharacterAndClassAndTrieValue) {
        let start_combining = self.push_decomposed_starter(c);
        // Not using `while let` to be able to set `iter_exhausted`
        loop {
            if let Some(ch) = self.iter_next() {
                if ch.decomposition_starts_with_non_starter() {
                    self.push_decomposed_combining(ch);
                } else {
                    // Got a new starter
                    // Indices are in range by construction, so indexing is OK.
                    #[expect(clippy::indexing_slicing)]
                    self.upcoming[start_combining..].sort_by_key(|c| c.ccc());
                    self.upcoming.push(ch);
                    return;
                }
            } else {
                #[cfg(debug_assertions)]
                {
                    self.iter_exhausted = true;
                }
                // Indices are in range by construction, so indexing is OK.
                #[expect(clippy::indexing_slicing)]
                self.upcoming[start_combining..].sort_by_key(|c| c.ccc());
                return;
            }
        }
    }

    // Assumption: `pos` starts from zero and increases one by one.
    // Indexing is OK, because we check against `len()` and the `pos`
    // increases one by one by construction.
    #[expect(clippy::indexing_slicing)]
    fn look_ahead(&mut self, pos: usize) -> Option<CharacterAndClassAndTrieValue> {
        debug_assert!(self.upcoming_normalized);
        if pos + 1 == self.upcoming.len() {
            let c = self.upcoming.remove(pos);
            self.push_decomposed_and_gather_combining(c);
            Some(self.upcoming[pos].clone())
        } else if pos == self.upcoming.len() {
            if let Some(c) = self.iter_next() {
                debug_assert!(
                    false,
                    "The `upcoming` queue should be empty when iteration `pos` at the end"
                );
                self.push_decomposed_and_gather_combining(c);
                Some(self.upcoming[pos].clone())
            } else {
                #[cfg(debug_assertions)]
                {
                    self.iter_exhausted = true;
                }
                None
            }
        } else {
            Some(self.upcoming[pos].clone())
        }
    }

    fn is_next_decomposition_starts_with_starter(&self) -> bool {
        if let Some(c_c_tv) = self.upcoming.first() {
            !c_c_tv.decomposition_starts_with_non_starter()
        } else {
            true
        }
    }

    fn prepend_and_sort_non_starter_prefix_of_suffix(&mut self, c: CharacterAndClassAndTrieValue) {
        // Add one for the insertion afterwards.
        let end = 1 + {
            let mut iter = self.upcoming.iter().enumerate();
            loop {
                if let Some((i, ch)) = iter.next() {
                    if !ch.decomposition_starts_with_non_starter() {
                        break i;
                    }
                } else {
                    #[cfg(debug_assertions)]
                    {
                        self.iter_exhausted = true;
                    }
                    break self.upcoming.len();
                }
            }
        };
        let start = c.decomposition_starts_with_non_starter() as usize;
        self.upcoming.insert(0, c);
        // Indices in range by construction
        #[expect(clippy::indexing_slicing)]
        {
            let slice: &mut [CharacterAndClassAndTrieValue] = &mut self.upcoming[start..end];
            slice.sort_by_key(|cc| cc.ccc());
        };
    }

    fn prefix_push(&mut self, c: char) {
        self.prefix[1] = self.prefix[0];
        self.prefix[0] = c;
    }

    /// Micro optimization for doing a simpler write when
    /// we know the most recent character was a non-starter
    /// that is not a kana voicing mark.
    fn mark_prefix_unmatchable(&mut self) {
        self.prefix[0] = '\u{FFFF}';
    }

    pub fn next(&mut self) -> CollationElement {
        #[cfg(debug_assertions)]
        debug_assert!(self.initialized);
        debug_assert!(self.is_next_decomposition_starts_with_starter());
        if let Some(&ret) = self.pending.get(self.pending_pos) {
            self.pending_pos += 1;
            if self.pending_pos == self.pending.len() {
                self.pending.clear();
                self.pending_pos = 0;
            }
            return ret;
        }
        debug_assert_eq!(self.pending_pos, 0);
        if let Some(c_c_tv) = self.next_internal() {
            let mut c = c_c_tv.character();
            let mut ce32;
            let mut data: &CollationData = self.tailoring;
            // TODO: Should this be a reusable buffer on the struct instead of
            // getting re-created on the stack every time?
            let mut combining_characters: SmallVec<
                [CharacterAndClass; COMBINING_CHARACTER_BUFFER_SIZE],
            > = SmallVec::new(); // TODO(#2005): Figure out good length

            // Betting that fusing the NFD algorithm into this one at the
            // expense of the repetitiveness below, the common cases become
            // fast in a way that offsets the lack of the canonical closure.
            // The wall of code before the "Slow path" is an attempt to
            // optimize based on that bet.

            // See components/normalizer/trie-value-format.md
            let decomposition = c_c_tv.trie_val;
            if (decomposition & !(BACKWARD_COMBINING_MARKER | NON_ROUND_TRIP_MARKER)) == 0 {
                // The character is its own decomposition

                // TODO: This is a bad idea. Make sure the jamo are in the root trie and then
                // remove this special case.
                let jamo_index = (c as usize).wrapping_sub(HANGUL_L_BASE as usize);
                // Attribute belongs on an inner expression, but
                // https://github.com/rust-lang/rust/issues/15701
                #[expect(clippy::indexing_slicing)]
                if jamo_index >= self.jamo.len() {
                    // Note: It might seem like a good idea to reuse the CE32s
                    // from the identical prefix check here, but the logistics
                    // actually make everything slower.
                    ce32 = data.ce32_for_char(c);
                    if ce32 == FALLBACK_CE32 {
                        data = self.root;
                        ce32 = data.ce32_for_char(c);
                    }
                } else {
                    // The purpose of reading the CE32 from the jamo table instead
                    // of the trie even in this case is to make it unnecessary
                    // for all search collation tries to carry a copy of the Hangul
                    // part of the search root. Instead, all non-Korean tailorings
                    // can use a shared copy of the non-Korean search jamo table.
                    //
                    // TODO(#1941): This isn't actually true with the current jamo
                    // search expansions!

                    // TODO(#1941): Instead of having different jamo CE32 table for
                    // "search" collations, we could instead decompose the archaic
                    // jamo to the modern approximation sequences here and then map
                    // those by looking up the modern jamo from the normal root.

                    // We need to set data to root, because archaic jamo refer to
                    // the root.
                    data = self.root;
                    // Index in range by construction above. Not using `get` with
                    // `if let` in order to put the likely branch first.
                    ce32 = CollationElement32::new_from_ule(self.jamo[jamo_index]);
                }
                if self.is_next_decomposition_starts_with_starter() {
                    if let Some(ce) = ce32.to_ce_simple_or_long_primary() {
                        self.prefix_push(c);
                        return ce;
                    } else if ce32.tag() == Tag::Contraction
                        && ce32.every_suffix_starts_with_combining()
                    {
                        // Avoid falling onto the slow path e.g. that letters that
                        // may contract with a diacritic when we know that it won't
                        // contract with the next character.
                        let default = data.get_default(ce32.index());
                        if let Some(ce) = default.to_ce_simple_or_long_primary() {
                            self.prefix_push(c);
                            return ce;
                        }
                    }
                    // TODO(2003): Figure out if it would be an optimization to
                    // handle `Implicit` and `Offset` tags here.
                }
            } else {
                let high_zeros = (decomposition & HIGH_ZEROS_MASK) == 0;
                let low_zeros = (decomposition & LOW_ZEROS_MASK) == 0;
                if !high_zeros && !low_zeros {
                    // Decomposition into two BMP characters: starter and non-starter
                    c = char_from_u32(decomposition & 0x7FFF);
                    ce32 = data.ce32_for_char(c);
                    if ce32 == FALLBACK_CE32 {
                        data = self.root;
                        ce32 = data.ce32_for_char(c);
                    }
                    let combining = char_from_u32((decomposition >> 15) & 0x7FFF);
                    if self.is_next_decomposition_starts_with_starter() {
                        let diacritic_index =
                            (combining as usize).wrapping_sub(COMBINING_DIACRITICS_BASE);
                        if let Some(secondary) = self.diacritics.get(diacritic_index) {
                            debug_assert_ne!(combining, '\u{0344}', "Should never have COMBINING GREEK DIALYTIKA TONOS here, since it should have decomposed further.");
                            if let Some(ce) = ce32.to_ce_simple_or_long_primary() {
                                let ce_for_combining =
                                    CollationElement::new_from_secondary(secondary);
                                self.pending.push(ce_for_combining);
                                self.mark_prefix_unmatchable();
                                return ce;
                            }
                            if ce32.tag() == Tag::Contraction
                                && ce32.every_suffix_starts_with_combining()
                            {
                                let (default, mut trie) = data.get_default_and_trie(ce32.index());
                                match trie.next(combining) {
                                    TrieResult::NoMatch | TrieResult::NoValue => {
                                        if let Some(ce) = default.to_ce_simple_or_long_primary() {
                                            let ce_for_combining =
                                                CollationElement::new_from_secondary(secondary);
                                            self.pending.push(ce_for_combining);
                                            self.mark_prefix_unmatchable();
                                            return ce;
                                        }
                                    }
                                    TrieResult::Intermediate(trie_ce32) => {
                                        if !ce32.at_least_one_suffix_contains_starter() {
                                            if let Some(ce) =
                                                CollationElement32::new(trie_ce32 as u32)
                                                    .to_ce_simple_or_long_primary()
                                            {
                                                self.mark_prefix_unmatchable();
                                                return ce;
                                            }
                                        }
                                    }
                                    TrieResult::FinalValue(trie_ce32) => {
                                        if let Some(ce) = CollationElement32::new(trie_ce32 as u32)
                                            .to_ce_simple_or_long_primary()
                                        {
                                            self.mark_prefix_unmatchable();
                                            return ce;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    combining_characters.push(CharacterAndClass::new_with_placeholder(combining));
                } else if high_zeros {
                    // Do the Hangul check on the character instead of trusting
                    // the trie value in order not to let GIGO cause unsafety.
                    let hangul_offset = u32::from(c).wrapping_sub(HANGUL_S_BASE); // SIndex in the spec
                    if hangul_offset < HANGUL_S_COUNT {
                        // Hangul syllable
                        // The math here comes from page 144 of Unicode 14.0
                        let l = hangul_offset / HANGUL_N_COUNT;
                        let v = (hangul_offset % HANGUL_N_COUNT) / HANGUL_T_COUNT;
                        let t = hangul_offset % HANGUL_T_COUNT;

                        // No prefix matches on Hangul
                        self.mark_prefix_unmatchable();
                        // Indexing OK, because indices in range by construction
                        #[expect(clippy::indexing_slicing)]
                        if self.is_next_decomposition_starts_with_starter() {
                            // TODO(#1941): Assuming self-contained CE32s is OK for the root,
                            // but not currently OK for search collation, which at this time
                            // do not support tailored Hangul.
                            self.pending.push(
                                CollationElement32::new_from_ule(
                                    self.jamo[(HANGUL_V_BASE - HANGUL_L_BASE + v) as usize],
                                )
                                .to_ce_self_contained_or_gigo(),
                            );
                            if t != 0 {
                                self.pending.push(
                                    CollationElement32::new_from_ule(
                                        self.jamo[(HANGUL_T_BASE - HANGUL_L_BASE + t) as usize],
                                    )
                                    .to_ce_self_contained_or_gigo(),
                                );
                            }
                            return CollationElement32::new_from_ule(self.jamo[l as usize])
                                .to_ce_self_contained_or_gigo();
                        }

                        // Uphold the invariant that the upcoming character is a starter (or end of stream)
                        // at the start of the next `next()` call. We uphold this invariant by leaving the
                        // last jamo unmapped to `CollationElement` in `pending` and instead prepend it to
                        // `upcoming`.
                        //
                        // Indexing OK, because indices in range by construction
                        #[expect(clippy::indexing_slicing)]
                        if t != 0 {
                            self.pending.push(
                                CollationElement32::new_from_ule(
                                    self.jamo[(HANGUL_V_BASE - HANGUL_L_BASE + v) as usize],
                                )
                                .to_ce_self_contained_or_gigo(),
                            );
                            self.upcoming.insert(
                                0,
                                // Safety: HANGUL_T_BASE is 0x11A7, t is < HANGUL_T_COUNT = 28, so this is definitely
                                // in range for a char (≤ 0xD800)
                                CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(
                                    unsafe { core::char::from_u32_unchecked(HANGUL_T_BASE + t) },
                                ),
                            );
                        } else {
                            self.upcoming.insert(
                                0,
                                // Safety: HANGUL_V_BASE is 0x1161, v is < HANGUL_N_COUNT = 588, so this is definitely
                                // in range for a char (≤ 0xD800)
                                CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(
                                    unsafe { core::char::from_u32_unchecked(HANGUL_V_BASE + v) },
                                ),
                            );
                        }

                        // Indexing OK, because indices in range by construction
                        #[expect(clippy::indexing_slicing)]
                        return CollationElement32::new_from_ule(self.jamo[l as usize])
                            .to_ce_self_contained_or_gigo();
                    }

                    let singleton = decomposition as u16;
                    debug_assert_ne!(
                        singleton, FDFA_MARKER,
                        "How come U+FDFA NFKD marker seen in NFD?"
                    );
                    // Decomposition into one BMP character
                    c = char_from_u16(singleton);
                    ce32 = data.ce32_for_char(c);
                    if ce32 == FALLBACK_CE32 {
                        data = self.root;
                        ce32 = data.ce32_for_char(c);
                    }
                    if self.is_next_decomposition_starts_with_starter() {
                        if let Some(ce) = ce32.to_ce_simple_or_long_primary() {
                            self.prefix_push(c);
                            return ce;
                        }
                    }
                } else {
                    debug_assert!(low_zeros);
                    // Only 12 of 14 bits used as of Unicode 16.
                    let offset = (((decomposition & !(0b11 << 30)) >> 16) as usize) - 1;
                    // Only 3 of 4 bits used as of Unicode 16.
                    let len_bits = decomposition & 0b1111;
                    let only_non_starters_in_trail = (decomposition & 0b10000) != 0;
                    if offset < self.scalars16.len() {
                        let len = (len_bits + 2) as usize;
                        let (starter, tail) = self
                            .scalars16
                            .get_subslice(offset..offset + len)
                            .and_then(ZeroSlice::split_first)
                            .map_or_else(
                                || {
                                    // GIGO case
                                    debug_assert!(false);
                                    (REPLACEMENT_CHARACTER, EMPTY_U16)
                                },
                                |(first, tail)| (char_from_u16(first), tail),
                            );
                        c = starter;
                        if only_non_starters_in_trail {
                            for u in tail.iter() {
                                let char_from_u = char_from_u16(u);
                                let trie_value = self.iter.trie().bmp(u);
                                let ccc = ccc_from_trie_value(trie_value);
                                combining_characters.push(CharacterAndClass::new(char_from_u, ccc));
                            }
                        } else {
                            let mut it = tail.iter();
                            while let Some(u) = it.next() {
                                let ch = char_from_u16(u);
                                let ccc = ccc_from_trie_value(self.iter.trie().bmp(u));
                                if ccc != CanonicalCombiningClass::NotReordered {
                                    // As of Unicode 14, this branch is never taken.
                                    // It exist for forward compatibility.
                                    combining_characters.push(CharacterAndClass::new(ch, ccc));
                                    continue;
                                }

                                // At this point, we might have a single newly-read
                                // combining character in self.upcoming. In that case, we
                                // need to buffer up the upcoming combining characters, too,
                                // in order to make `prepend_and_sort_non_starter_prefix_of_suffix`
                                // sort the right characters.
                                self.maybe_gather_combining();

                                while let Some(u) = it.next_back() {
                                    let tail_char = char_from_u16(u);
                                    let trie_value = self.iter.trie().bmp(u);
                                    self.prepend_and_sort_non_starter_prefix_of_suffix(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(tail_char, trie_value));
                                }
                                self.prepend_and_sort_non_starter_prefix_of_suffix(
                                    CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(
                                        ch,
                                    ),
                                );
                                break;
                            }
                        }
                    } else {
                        let len = (len_bits + 1) as usize;
                        let offset32 = offset - self.scalars16.len();
                        let (starter, tail) = self
                            .scalars32
                            .get_subslice(offset32..offset32 + len)
                            .and_then(|slice| slice.split_first())
                            .unwrap_or_else(|| {
                                // GIGO case
                                debug_assert!(false);
                                (REPLACEMENT_CHARACTER, EMPTY_CHAR)
                            });

                        c = starter;
                        if only_non_starters_in_trail {
                            for ch in tail.iter() {
                                let trie_value = self.iter.trie().scalar(ch);
                                let ccc = ccc_from_trie_value(trie_value);
                                combining_characters.push(CharacterAndClass::new(ch, ccc));
                            }
                        } else {
                            let mut it = tail.iter();
                            while let Some(ch) = it.next() {
                                let ccc = ccc_from_trie_value(self.iter.trie().scalar(ch));
                                if ccc != CanonicalCombiningClass::NotReordered {
                                    // As of Unicode 14, this branch is never taken.
                                    // It exist for forward compatibility.
                                    combining_characters.push(CharacterAndClass::new(ch, ccc));
                                    continue;
                                }
                                // At this point, we might have a single newly-read
                                // combining character in self.upcoming. In that case, we
                                // need to buffer up the upcoming combining characters, too,
                                // in order to make `prepend_and_sort_non_starter_prefix_of_suffix`
                                // sort the right characters.
                                self.maybe_gather_combining();

                                while let Some(tail_char) = it.next_back() {
                                    let trie_value = self.iter.trie().scalar(tail_char);
                                    self.prepend_and_sort_non_starter_prefix_of_suffix(CharacterAndClassAndTrieValue::new_with_non_special_decomposition_trie_val(tail_char, trie_value));
                                }
                                self.prepend_and_sort_non_starter_prefix_of_suffix(
                                    CharacterAndClassAndTrieValue::new_with_non_decomposing_starter(
                                        ch,
                                    ),
                                );
                                break;
                            }
                        }
                    }
                    ce32 = data.ce32_for_char(c);
                    if ce32 == FALLBACK_CE32 {
                        data = self.root;
                        ce32 = data.ce32_for_char(c);
                    }
                }
            }
            let mut may_have_contracted_starter = false;
            // Slow path
            self.collect_combining(&mut combining_characters);
            // Now:
            // c is the starter character
            // ce32 is the CollationElement32 for the starter
            // combining_characters contains all the combining characters before
            // the next starter sorted by combining class.
            let mut looked_ahead = 0;
            let mut drain_from_upcoming = 0;
            'outer: loop {
                'ce32loop: loop {
                    // TODO(#2002): Ensure that the CE32 flavors in this loop are checked in the optimal
                    // order given their frequency in real workloads.
                    if let Some(ce) = ce32.to_ce_self_contained() {
                        self.pending.push(ce);
                        break 'ce32loop;
                    } else {
                        match ce32.tag() {
                            Tag::Expansion32 => {
                                let ce32s = data.get_ce32s(ce32.index(), ce32.len());
                                for u in ce32s.iter() {
                                    self.pending.push(
                                        CollationElement32::new(u).to_ce_self_contained_or_gigo(),
                                    );
                                }
                                break 'ce32loop;
                            }
                            Tag::Expansion => {
                                let ces = data.get_ces(ce32.index(), ce32.len());
                                for u in ces.iter() {
                                    self.pending.push(CollationElement::new(u));
                                }
                                break 'ce32loop;
                            }
                            Tag::Prefix => {
                                let (default, mut trie) = data.get_default_and_trie(ce32.index());
                                ce32 = default;
                                for &ch in self.prefix.iter() {
                                    match trie.next(ch) {
                                        TrieResult::NoValue => {}
                                        TrieResult::NoMatch => {
                                            continue 'ce32loop;
                                        }
                                        TrieResult::Intermediate(ce32_i) => {
                                            ce32 = CollationElement32::new(ce32_i as u32);
                                        }
                                        TrieResult::FinalValue(ce32_i) => {
                                            ce32 = CollationElement32::new(ce32_i as u32);
                                            continue 'ce32loop;
                                        }
                                    }
                                }
                                continue 'ce32loop;
                            }
                            Tag::Contraction => {
                                let every_suffix_starts_with_combining =
                                    ce32.every_suffix_starts_with_combining();
                                let at_least_one_suffix_contains_starter =
                                    ce32.at_least_one_suffix_contains_starter();
                                let at_least_one_suffix_ends_with_non_starter =
                                    ce32.at_least_one_suffix_ends_with_non_starter();
                                let (default, mut trie) = data.get_default_and_trie(ce32.index());
                                ce32 = default;
                                if every_suffix_starts_with_combining
                                    && combining_characters.is_empty()
                                {
                                    continue 'ce32loop;
                                }
                                let mut longest_matching_state = trie.clone();
                                let mut longest_matching_index = 0;
                                let mut attempt = 0;
                                let mut i = 0;
                                let mut most_recent_skipped_ccc =
                                    CanonicalCombiningClass::NotReordered;
                                // TODO(#2001): Pending removals will in practice be small numbers.
                                // What if we made the item smaller than usize?
                                let mut pending_removals: SmallVec<[usize; PENDING_REMOVALS_SIZE]> =
                                    SmallVec::new();
                                while let Some((character, ccc)) =
                                    combining_characters.get(i).map(|c| c.character_and_ccc())
                                {
                                    match (most_recent_skipped_ccc < ccc, trie.next(character)) {
                                        (true, TrieResult::Intermediate(ce32_i)) => {
                                            let _ = combining_characters.remove(i);
                                            while let Some(idx) = pending_removals.pop() {
                                                combining_characters.remove(idx);
                                                i -= 1; // Adjust for the shortening
                                            }
                                            attempt = 0;
                                            longest_matching_index = i;
                                            longest_matching_state = trie.clone();
                                            ce32 = CollationElement32::new(ce32_i as u32);
                                        }
                                        (true, TrieResult::FinalValue(ce32_i)) => {
                                            let _ = combining_characters.remove(i);
                                            while let Some(idx) = pending_removals.pop() {
                                                combining_characters.remove(idx);
                                            }
                                            ce32 = CollationElement32::new(ce32_i as u32);
                                            continue 'ce32loop;
                                        }
                                        (_, TrieResult::NoValue) => {
                                            pending_removals.push(i);
                                            i += 1;
                                        }
                                        _ => {
                                            pending_removals.clear();
                                            most_recent_skipped_ccc = ccc;
                                            attempt += 1;
                                            i = longest_matching_index + attempt;
                                            trie = longest_matching_state.clone();
                                        }
                                    }
                                }
                                if !(at_least_one_suffix_contains_starter
                                    && combining_characters.is_empty())
                                {
                                    continue 'ce32loop;
                                }
                                // Let's just set this flag here instead of trying to make
                                // it more granular and, therefore, more error-prone.
                                // After all, this flag is just about optimizing away one
                                // `CodePointInversionList` check in the common case.
                                may_have_contracted_starter = true;
                                debug_assert!(pending_removals.is_empty());
                                self.ensure_upcoming_normalized();
                                loop {
                                    let ahead = self.look_ahead(looked_ahead);
                                    looked_ahead += 1;
                                    if let Some(ch) = ahead {
                                        match trie.next(ch.character()) {
                                            TrieResult::NoValue => {}
                                            TrieResult::NoMatch => {
                                                if !at_least_one_suffix_ends_with_non_starter {
                                                    continue 'ce32loop;
                                                }
                                                if !ch.decomposition_starts_with_non_starter() {
                                                    continue 'ce32loop;
                                                }
                                                // The last-checked character is non-starter
                                                // and at least one contraction suffix ends
                                                // with a non-starter. Try a discontiguous
                                                // match.
                                                trie = longest_matching_state.clone();
                                                // For clarity, mint a new set of variables that
                                                // behave consistently with the
                                                // `combining_characters` case
                                                let mut longest_matching_index = 0;
                                                let mut attempt = 0;
                                                let mut i = 0;
                                                most_recent_skipped_ccc = ch.ccc();
                                                self.ensure_upcoming_normalized();
                                                loop {
                                                    let ahead = self.look_ahead(looked_ahead + i);
                                                    if let Some(ch) = ahead {
                                                        let ccc = ch.ccc();
                                                        if ccc
                                                            == CanonicalCombiningClass::NotReordered
                                                        {
                                                            // If we came here, we had an intervening non-matching
                                                            // non-starter, after which we cannot contract another
                                                            // starter anymore.
                                                            continue 'ce32loop;
                                                        }
                                                        match (
                                                            most_recent_skipped_ccc < ccc,
                                                            trie.next(ch.character()),
                                                        ) {
                                                            (
                                                                true,
                                                                TrieResult::Intermediate(ce32_i),
                                                            ) => {
                                                                let _ = self
                                                                    .upcoming
                                                                    .remove(looked_ahead + i);
                                                                while let Some(idx) =
                                                                    pending_removals.pop()
                                                                {
                                                                    self.upcoming
                                                                        .remove(looked_ahead + idx);
                                                                    i -= 1; // Adjust for the shortening
                                                                }
                                                                attempt = 0;
                                                                longest_matching_index = i;
                                                                longest_matching_state =
                                                                    trie.clone();
                                                                ce32 = CollationElement32::new(
                                                                    ce32_i as u32,
                                                                );
                                                            }
                                                            (
                                                                true,
                                                                TrieResult::FinalValue(ce32_i),
                                                            ) => {
                                                                let _ = self
                                                                    .upcoming
                                                                    .remove(looked_ahead + i);
                                                                while let Some(idx) =
                                                                    pending_removals.pop()
                                                                {
                                                                    self.upcoming
                                                                        .remove(looked_ahead + idx);
                                                                }
                                                                ce32 = CollationElement32::new(
                                                                    ce32_i as u32,
                                                                );
                                                                continue 'ce32loop;
                                                            }
                                                            (_, TrieResult::NoValue) => {
                                                                pending_removals.push(i);
                                                                i += 1;
                                                            }
                                                            _ => {
                                                                pending_removals.clear();
                                                                most_recent_skipped_ccc = ccc;
                                                                attempt += 1;
                                                                i = longest_matching_index
                                                                    + attempt;
                                                                trie =
                                                                    longest_matching_state.clone();
                                                            }
                                                        }
                                                    } else {
                                                        continue 'ce32loop;
                                                    }
                                                }
                                            }
                                            TrieResult::Intermediate(ce32_i) => {
                                                longest_matching_state = trie.clone();
                                                drain_from_upcoming = looked_ahead;
                                                ce32 = CollationElement32::new(ce32_i as u32);
                                            }
                                            TrieResult::FinalValue(ce32_i) => {
                                                drain_from_upcoming = looked_ahead;
                                                ce32 = CollationElement32::new(ce32_i as u32);
                                                continue 'ce32loop;
                                            }
                                        }
                                    } else {
                                        continue 'ce32loop;
                                    }
                                }
                                // Unreachable
                            }
                            Tag::Digit => {
                                if let Some(high_bits) = self.numeric_primary {
                                    let mut digits: SmallVec<[u8; DIGIT_BUFFER_SIZE]> =
                                        SmallVec::new(); // TODO(#2005): Figure out good length
                                    digits.push(ce32.digit());
                                    let numeric_primary = u32::from(high_bits) << 24;
                                    if combining_characters.is_empty() {
                                        // Numeric collation doesn't work with combining
                                        // characters applied to the digits.
                                        // It's unclear if reading from the tailoring first
                                        // is needed for practical purposes, since it doesn't
                                        // make much sense to tailor the numeric value of digits.
                                        // Performing the usual fallback pattern anyway just in
                                        // case.
                                        may_have_contracted_starter = true;
                                        self.ensure_upcoming_normalized();
                                        while let Some(upcoming) = self.look_ahead(looked_ahead) {
                                            looked_ahead += 1;
                                            ce32 =
                                                self.tailoring.ce32_for_char(upcoming.character());
                                            if ce32 == FALLBACK_CE32 {
                                                ce32 =
                                                    self.root.ce32_for_char(upcoming.character());
                                            }
                                            if ce32.tag_checked() != Some(Tag::Digit) {
                                                break;
                                            }
                                            drain_from_upcoming = looked_ahead;
                                            digits.push(ce32.digit());
                                        }
                                    }
                                    // Skip leading zeros
                                    let mut zeros = 0;
                                    while let Some(&digit) = digits.get(zeros) {
                                        if digit != 0 {
                                            break;
                                        }
                                        zeros += 1;
                                    }
                                    if zeros == digits.len() {
                                        // All zeros, keep a zero
                                        zeros = digits.len() - 1;
                                    }
                                    // Index in range by construction above
                                    #[expect(clippy::indexing_slicing)]
                                    let mut remaining = &digits[zeros..];
                                    while !remaining.is_empty() {
                                        // Numeric CEs are generated for segments of
                                        // up to 254 digits.
                                        let (head, tail) = remaining
                                            .split_at_checked(254)
                                            .unwrap_or((remaining, b""));
                                        remaining = tail;
                                        // From ICU4C CollationIterator::appendNumericSegmentCEs
                                        if head.len() <= 7 {
                                            let mut digit_iter = head.iter();
                                            // `unwrap` succeeds, because we always have at least one
                                            // digit to even start numeric processing.
                                            #[expect(clippy::unwrap_used)]
                                            let mut value = u32::from(*digit_iter.next().unwrap());
                                            for &digit in digit_iter {
                                                value *= 10;
                                                value += u32::from(digit);
                                            }
                                            // Primary weight second byte values:
                                            //     74 byte values   2.. 75 for small numbers in two-byte primary weights.
                                            //     40 byte values  76..115 for medium numbers in three-byte primary weights.
                                            //     16 byte values 116..131 for large numbers in four-byte primary weights.
                                            //    124 byte values 132..255 for very large numbers with 4..127 digit pairs.
                                            let mut first_byte = 2u32;
                                            let mut num_bytes = 74u32;
                                            if value < num_bytes {
                                                self.pending.push(
                                                    CollationElement::new_from_primary(
                                                        numeric_primary
                                                            | ((first_byte + value) << 16),
                                                    ),
                                                );
                                                continue;
                                            }
                                            value -= num_bytes;
                                            first_byte += num_bytes;
                                            num_bytes = 40;
                                            if value < num_bytes * 254 {
                                                // Three-byte primary for 74..10233=74+40*254-1, good for year numbers and more.
                                                self.pending.push(
                                                    CollationElement::new_from_primary(
                                                        numeric_primary
                                                            | ((first_byte + value / 254) << 16)
                                                            | ((2 + value % 254) << 8),
                                                    ),
                                                );
                                                continue;
                                            }
                                            value -= num_bytes * 254;
                                            first_byte += num_bytes;
                                            num_bytes = 16;
                                            if value < num_bytes * 254 * 254 {
                                                // Four-byte primary for 10234..1042489=10234+16*254*254-1.
                                                let mut primary =
                                                    numeric_primary | (2 + value % 254);
                                                value /= 254;
                                                primary |= (2 + value % 254) << 8;
                                                value /= 254;
                                                primary |= (first_byte + value % 254) << 16;
                                                self.pending.push(
                                                    CollationElement::new_from_primary(primary),
                                                );
                                                continue;
                                            }
                                            // original value > 1042489
                                        }
                                        debug_assert!(head.len() >= 7);
                                        // The second primary byte value 132..255 indicates the number of digit pairs (4..127),
                                        // then we generate primary bytes with those pairs.
                                        // Omit trailing 00 pairs.
                                        // Decrement the value for the last pair.

                                        // Set the exponent. 4 pairs->132, 5 pairs->133, ..., 127 pairs->255.
                                        let mut len = head.len();
                                        let num_pairs = (len as u32).div_ceil(2); // as u32 OK, because capped to 254
                                        let mut primary =
                                            numeric_primary | ((132 - 4 + num_pairs) << 16);
                                        // Find the length without trailing 00 pairs.
                                        //
                                        // The indexing below is within bounds due to the following:
                                        //
                                        // * We skipped leading zeros.
                                        // * If `len == 2`: The loop condition is false, because
                                        //   `head[len - 2]` isn't a leading zero.
                                        // * If `len == 1`: The loop condition is false, because
                                        //   `head[len - 1]` isn't a leading zero, and `&&`
                                        //   short-circuits, so the `head[len - 2]` access doesn't
                                        //   occur.
                                        #[expect(clippy::indexing_slicing)]
                                        while head[len - 1] == 0 && head[len - 2] == 0 {
                                            len -= 2;
                                        }
                                        // Read the first pair
                                        // Index in bounds by construction above.
                                        #[expect(clippy::indexing_slicing)]
                                        let mut digit_iter = head[..len].iter();
                                        // `unwrap` succeeds by construction
                                        #[expect(clippy::unwrap_used)]
                                        let mut pair = if len & 1 == 1 {
                                            // Only "half a pair" if we have an odd number of digits.
                                            u32::from(*digit_iter.next().unwrap())
                                        } else {
                                            u32::from(*digit_iter.next().unwrap()) * 10
                                                + u32::from(*digit_iter.next().unwrap())
                                        };
                                        pair = 11 + 2 * pair;
                                        let mut shift = 8u32;
                                        while let (Some(&left), Some(&right)) =
                                            (digit_iter.next(), digit_iter.next())
                                        {
                                            if shift == 0 {
                                                primary |= pair;
                                                self.pending.push(
                                                    CollationElement::new_from_primary(primary),
                                                );
                                                primary = numeric_primary;
                                                shift = 16;
                                            } else {
                                                primary |= pair << shift;
                                                shift -= 8;
                                            }
                                            pair =
                                                11 + 2 * (u32::from(left) * 10 + u32::from(right));
                                        }
                                        primary |= (pair - 1) << shift;
                                        self.pending
                                            .push(CollationElement::new_from_primary(primary));
                                    }
                                    break 'ce32loop;
                                }
                                ce32 = data.get_ce32(ce32.index());
                                continue 'ce32loop;
                            }
                            Tag::Offset => {
                                self.pending.push(data.ce_from_offset_ce32(c, ce32));
                                break 'ce32loop;
                            }
                            Tag::Implicit => {
                                self.pending
                                    .push(CollationElement::new_implicit_from_char(c));
                                break 'ce32loop;
                            }
                            Tag::Fallback
                            | Tag::Reserved3
                            | Tag::LongPrimary
                            | Tag::LongSecondary
                            | Tag::BuilderData
                            | Tag::LeadSurrogate
                            | Tag::LatinExpansion
                            | Tag::U0000
                            | Tag::Hangul => {
                                debug_assert!(false);
                                // GIGO case
                                self.pending.push(FFFD_CE);
                                break 'ce32loop;
                            }
                        }
                    }
                }
                self.prefix_push(c);
                'combining_outer: loop {
                    debug_assert!(drain_from_upcoming == 0 || combining_characters.is_empty());
                    let mut i = 0;
                    'combining: while let Some(ch) =
                        combining_characters.get(i).map(|c| c.character())
                    {
                        c = ch;
                        let diacritic_index = (c as usize).wrapping_sub(COMBINING_DIACRITICS_BASE);
                        if let Some(secondary) = self.diacritics.get(diacritic_index) {
                            // TODO(#2006): unlikely annotation
                            if c == '\u{0307}' && self.lithuanian_dot_above {
                                if let Some(next_c) =
                                    combining_characters.get(i + 1).map(|c| c.character())
                                {
                                    if next_c == '\u{0300}'
                                        || next_c == '\u{0301}'
                                        || next_c == '\u{0303}'
                                    {
                                        // Lithuanian contracts COMBINING DOT ABOVE with three other diacritics of the
                                        // same combining class such that the COMBINING DOT ABOVE is ignored for
                                        // collation. Since the combining class is the same, it's valid to simply
                                        // look at the next character in `combining_characters`.
                                        i += 1;
                                        continue 'combining;
                                    }
                                }
                            }
                            self.pending
                                .push(CollationElement::new_from_secondary(secondary));
                            self.mark_prefix_unmatchable();
                            i += 1;
                            continue 'combining;
                        }
                        // `c` is not a table-optimized diacritic.
                        // Not bothering to micro optimize away the move of the remaining
                        // part of `combining_characters`.
                        let _ = combining_characters.drain(..=i);
                        data = self.tailoring;
                        ce32 = data.ce32_for_char(c);
                        if ce32 == FALLBACK_CE32 {
                            data = self.root;
                            ce32 = data.ce32_for_char(c);
                        }
                        continue 'outer;
                    }
                    // Note: The borrow checker didn't like the iterator formulation
                    // for the loop below, because the `Drain` would have kept `self`
                    // mutable borrowed when trying to call `prefix_push`. To change
                    // this, `prefix` and `prefix_push` would need to be refactored
                    // into a struct.
                    i = 0;
                    while i < drain_from_upcoming {
                        // By construction, `drain_from_upcoming` doesn't exceed `upcoming.len()`
                        #[expect(clippy::indexing_slicing)]
                        let ch = self.upcoming[i].character();
                        self.prefix_push(ch);
                        i += 1;
                    }
                    // TODO(#2004): The above makes prefix out of sync when starter-contracting
                    // contractions use `pending_removals` instead of `drain_from_upcoming`.
                    // Do there exist prefixes that overlap contraction suffixes?
                    // At least as of CLDR 40, the two possible non-starters in prefixes,
                    // kana voicing marks, shouldn't be participating in Brahmic contractions.
                    let _ = self.upcoming.drain(..drain_from_upcoming);
                    if self.upcoming.is_empty() {
                        // Make the assertion conditional to make CI happy.
                        #[cfg(debug_assertions)]
                        debug_assert!(self.iter_exhausted || may_have_contracted_starter);
                        if let Some(c_c_tv) = self.iter_next() {
                            self.upcoming.push(c_c_tv);
                        } else {
                            #[cfg(debug_assertions)]
                            {
                                self.iter_exhausted = true;
                            }
                        }
                    }
                    if may_have_contracted_starter {
                        may_have_contracted_starter = false;
                        if !self.is_next_decomposition_starts_with_starter() {
                            // We need to loop back and process another round of
                            // non-starters in order to maintain the invariant of
                            // `upcoming` on the next call to `next()`.
                            drain_from_upcoming = 0;
                            self.collect_combining(&mut combining_characters);
                            continue 'combining_outer;
                        }
                    }
                    // By construction, we have at least on pending CE by now.
                    #[expect(clippy::indexing_slicing)]
                    let ret = self.pending[0];
                    debug_assert_eq!(self.pending_pos, 0);
                    if self.pending.len() == 1 {
                        self.pending.clear();
                    } else {
                        self.pending_pos = 1;
                    }
                    return ret;
                }
            }
        } else {
            NO_CE
        }
    }

    #[inline(always)]
    fn collect_combining(
        &mut self,
        combining_characters: &mut SmallVec<[CharacterAndClass; COMBINING_CHARACTER_BUFFER_SIZE]>,
    ) {
        while !self.is_next_decomposition_starts_with_starter() {
            // `unwrap` is OK, because `!self.is_next_decomposition_starts_with_starter()`
            // means the `unwrap()` must succeed.
            #[expect(clippy::unwrap_used)]
            let combining = self.next_internal().unwrap().c_and_c;
            let combining_c = combining.character();
            if !in_inclusive_range(combining_c, '\u{0340}', '\u{0F81}') {
                combining_characters.push(combining);
            } else {
                // The Tibetan special cases are starters that decompose into non-starters.
                match combining_c {
                    '\u{0340}' => {
                        // COMBINING GRAVE TONE MARK
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0300}',
                            CanonicalCombiningClass::Above,
                        ));
                    }
                    '\u{0341}' => {
                        // COMBINING ACUTE TONE MARK
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0301}',
                            CanonicalCombiningClass::Above,
                        ));
                    }
                    '\u{0343}' => {
                        // COMBINING GREEK KORONIS
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0313}',
                            CanonicalCombiningClass::Above,
                        ));
                    }
                    '\u{0344}' => {
                        // COMBINING GREEK DIALYTIKA TONOS
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0308}',
                            CanonicalCombiningClass::Above,
                        ));
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0301}',
                            CanonicalCombiningClass::Above,
                        ));
                    }
                    '\u{0F73}' => {
                        // TIBETAN VOWEL SIGN II
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F71}',
                            CanonicalCombiningClass::CCC129,
                        ));
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F72}',
                            CanonicalCombiningClass::CCC130,
                        ));
                    }
                    '\u{0F75}' => {
                        // TIBETAN VOWEL SIGN UU
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F71}',
                            CanonicalCombiningClass::CCC129,
                        ));
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F74}',
                            CanonicalCombiningClass::CCC132,
                        ));
                    }
                    '\u{0F81}' => {
                        // TIBETAN VOWEL SIGN REVERSED II
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F71}',
                            CanonicalCombiningClass::CCC129,
                        ));
                        combining_characters.push(CharacterAndClass::new(
                            '\u{0F80}',
                            CanonicalCombiningClass::CCC130,
                        ));
                    }
                    _ => {
                        combining_characters.push(combining);
                    }
                };
            }
        }
        if combining_characters.len() > 1 {
            // This optimizes away the class lookup when len() == 1.
            // Unclear if this micro optimization is worthwhile.
            // In any case, we store the CanonicalCombiningClass in order to
            // avoid having to look it up again when deciding whether to proceed
            // with a discontiguous match. As a side effect, it also means that
            // duplicate lookups aren't needed if the sort below happens to compare
            // an item more than once.
            combining_characters
                .iter_mut()
                .for_each(|cc| cc.set_ccc_from_trie_if_not_already_set(self.iter.trie()));
            combining_characters.sort_by_key(|cc| cc.ccc());
        }
    }
}