// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). //! This module defines all available properties. //! //! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1] //! and implement [`EnumeratedProperty`]. //! //! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData), //! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData). //! //! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or //! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser), //! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort) //! can be constructed. //! //! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums) pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty}; pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType}; /// See [`test_enumerated_property_completeness`] for usage. /// Example input: /// ```ignore /// impl EastAsianWidth { /// pub const Neutral: EastAsianWidth = EastAsianWidth(0); /// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); /// ... /// } /// ``` /// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by /// explicitly casting first field of the struct to u16. macro_rules! create_const_array { ( $ ( #[$meta:meta] )* impl $enum_ty:ident { $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )* } ) => { $( #[$meta] )* impl $enum_ty { $( $(#[$const_meta])* $v const $i: $t = $e; )* /// All possible values of this enum in the Unicode version /// from this ICU4X release. pub const ALL_VALUES: &'static [$enum_ty] = &[ $($enum_ty::$i),* ]; } #[cfg(feature = "datagen")] impl databake::Bake for $enum_ty { fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { env.insert("icu_properties"); match *self { $( Self::$i => databake::quote!(icu_properties::props::$enum_ty::$i), )* Self(v) => databake::quote!(icu_properties::props::$enum_ty::from_icu4c_value(#v)), } } } impl From<$enum_ty> for u16 { fn from(other: $enum_ty) -> Self { other.0 as u16 } } } } pub use crate::code_point_map::EnumeratedProperty; macro_rules! make_enumerated_property { ( name: $name:literal; short_name: $short_name:literal; ident: $value_ty:path; data_marker: $data_marker:ty; singleton: $singleton:ident; $(ule_ty: $ule_ty:ty;)? ) => { impl crate::private::Sealed for $value_ty {} impl EnumeratedProperty for $value_ty { type DataMarker = $data_marker; #[cfg(feature = "compiled_data")] const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> = crate::provider::Baked::$singleton; const NAME: &'static [u8] = $name.as_bytes(); const SHORT_NAME: &'static [u8] = $short_name.as_bytes(); } $( impl zerovec::ule::AsULE for $value_ty { type ULE = $ule_ty; fn to_unaligned(self) -> Self::ULE { self.0.to_unaligned() } fn from_unaligned(unaligned: Self::ULE) -> Self { Self(zerovec::ule::AsULE::from_unaligned(unaligned)) } } )? }; } /// Enumerated property Bidi_Class /// /// These are the categories required by the Unicode Bidirectional Algorithm. /// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values). /// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9). /// /// # Example /// /// ``` /// use icu::properties::{props::BidiClass, CodePointMapData}; /// /// assert_eq!( /// CodePointMapData::::new().get('y'), /// BidiClass::LeftToRight /// ); // U+0079 /// assert_eq!( /// CodePointMapData::::new().get('ع'), /// BidiClass::ArabicLetter /// ); // U+0639 /// ``` #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[allow(clippy::exhaustive_structs)] // newtype #[repr(transparent)] pub struct BidiClass(pub(crate) u8); impl BidiClass { /// Returns an ICU4C `UBidiClass` value. pub const fn to_icu4c_value(self) -> u8 { self.0 } /// Constructor from an ICU4C `UBidiClass` value. pub const fn from_icu4c_value(value: u8) -> Self { Self(value) } } create_const_array! { #[allow(non_upper_case_globals)] impl BidiClass { /// (`L`) any strong left-to-right character pub const LeftToRight: BidiClass = BidiClass(0); /// (`R`) any strong right-to-left (non-Arabic-type) character pub const RightToLeft: BidiClass = BidiClass(1); /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit pub const EuropeanNumber: BidiClass = BidiClass(2); /// (`ES`) plus and minus signs pub const EuropeanSeparator: BidiClass = BidiClass(3); /// (`ET`) a terminator in a numeric format context, includes currency signs pub const EuropeanTerminator: BidiClass = BidiClass(4); /// (`AN`) any Arabic-Indic digit pub const ArabicNumber: BidiClass = BidiClass(5); /// (`CS`) commas, colons, and slashes pub const CommonSeparator: BidiClass = BidiClass(6); /// (`B`) various newline characters pub const ParagraphSeparator: BidiClass = BidiClass(7); /// (`S`) various segment-related control codes pub const SegmentSeparator: BidiClass = BidiClass(8); /// (`WS`) spaces pub const WhiteSpace: BidiClass = BidiClass(9); /// (`ON`) most other symbols and punctuation marks pub const OtherNeutral: BidiClass = BidiClass(10); /// (`LRE`) U+202A: the LR embedding control pub const LeftToRightEmbedding: BidiClass = BidiClass(11); /// (`LRO`) U+202D: the LR override control pub const LeftToRightOverride: BidiClass = BidiClass(12); /// (`AL`) any strong right-to-left (Arabic-type) character pub const ArabicLetter: BidiClass = BidiClass(13); /// (`RLE`) U+202B: the RL embedding control pub const RightToLeftEmbedding: BidiClass = BidiClass(14); /// (`RLO`) U+202E: the RL override control pub const RightToLeftOverride: BidiClass = BidiClass(15); /// (`PDF`) U+202C: terminates an embedding or override control pub const PopDirectionalFormat: BidiClass = BidiClass(16); /// (`NSM`) any nonspacing mark pub const NonspacingMark: BidiClass = BidiClass(17); /// (`BN`) most format characters, control codes, or noncharacters pub const BoundaryNeutral: BidiClass = BidiClass(18); /// (`FSI`) U+2068: the first strong isolate control pub const FirstStrongIsolate: BidiClass = BidiClass(19); /// (`LRI`) U+2066: the LR isolate control pub const LeftToRightIsolate: BidiClass = BidiClass(20); /// (`RLI`) U+2067: the RL isolate control pub const RightToLeftIsolate: BidiClass = BidiClass(21); /// (`PDI`) U+2069: terminates an isolate control pub const PopDirectionalIsolate: BidiClass = BidiClass(22); } } make_enumerated_property! { name: "Bidi_Class"; short_name: "bc"; ident: BidiClass; data_marker: crate::provider::PropertyEnumBidiClassV1; singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1; ule_ty: u8; } // This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props pub(crate) mod gc { /// Enumerated property General_Category. /// /// General_Category specifies the most general classification of a code point, usually /// determined based on the primary characteristic of the assigned character. For example, is the /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type? /// /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`). /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`]( /// crate::props::GeneralCategoryGroup). /// /// # Example /// /// ``` /// use icu::properties::{props::GeneralCategory, CodePointMapData}; /// /// assert_eq!( /// CodePointMapData::::new().get('木'), /// GeneralCategory::OtherLetter /// ); // U+6728 /// assert_eq!( /// CodePointMapData::::new().get('🎃'), /// GeneralCategory::OtherSymbol /// ); // U+1F383 JACK-O-LANTERN /// ``` #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[cfg_attr(feature = "datagen", derive(databake::Bake))] #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))] #[allow(clippy::exhaustive_enums)] // this type is stable #[zerovec::make_ule(GeneralCategoryULE)] #[repr(u8)] pub enum GeneralCategory { /// (`Cn`) A reserved unassigned code point or a noncharacter Unassigned = 0, /// (`Lu`) An uppercase letter UppercaseLetter = 1, /// (`Ll`) A lowercase letter LowercaseLetter = 2, /// (`Lt`) A digraphic letter, with first part uppercase TitlecaseLetter = 3, /// (`Lm`) A modifier letter ModifierLetter = 4, /// (`Lo`) Other letters, including syllables and ideographs OtherLetter = 5, /// (`Mn`) A nonspacing combining mark (zero advance width) NonspacingMark = 6, /// (`Mc`) A spacing combining mark (positive advance width) SpacingMark = 8, /// (`Me`) An enclosing combining mark EnclosingMark = 7, /// (`Nd`) A decimal digit DecimalNumber = 9, /// (`Nl`) A letterlike numeric character LetterNumber = 10, /// (`No`) A numeric character of other type OtherNumber = 11, /// (`Zs`) A space character (of various non-zero widths) SpaceSeparator = 12, /// (`Zl`) U+2028 LINE SEPARATOR only LineSeparator = 13, /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only ParagraphSeparator = 14, /// (`Cc`) A C0 or C1 control code Control = 15, /// (`Cf`) A format control character Format = 16, /// (`Co`) A private-use character PrivateUse = 17, /// (`Cs`) A surrogate code point Surrogate = 18, /// (`Pd`) A dash or hyphen punctuation mark DashPunctuation = 19, /// (`Ps`) An opening punctuation mark (of a pair) OpenPunctuation = 20, /// (`Pe`) A closing punctuation mark (of a pair) ClosePunctuation = 21, /// (`Pc`) A connecting punctuation mark, like a tie ConnectorPunctuation = 22, /// (`Pi`) An initial quotation mark InitialPunctuation = 28, /// (`Pf`) A final quotation mark FinalPunctuation = 29, /// (`Po`) A punctuation mark of other type OtherPunctuation = 23, /// (`Sm`) A symbol of mathematical use MathSymbol = 24, /// (`Sc`) A currency sign CurrencySymbol = 25, /// (`Sk`) A non-letterlike modifier symbol ModifierSymbol = 26, /// (`So`) A symbol of other type OtherSymbol = 27, } } pub use gc::GeneralCategory; impl GeneralCategory { /// All possible values of this enum pub const ALL_VALUES: &'static [GeneralCategory] = &[ GeneralCategory::Unassigned, GeneralCategory::UppercaseLetter, GeneralCategory::LowercaseLetter, GeneralCategory::TitlecaseLetter, GeneralCategory::ModifierLetter, GeneralCategory::OtherLetter, GeneralCategory::NonspacingMark, GeneralCategory::SpacingMark, GeneralCategory::EnclosingMark, GeneralCategory::DecimalNumber, GeneralCategory::LetterNumber, GeneralCategory::OtherNumber, GeneralCategory::SpaceSeparator, GeneralCategory::LineSeparator, GeneralCategory::ParagraphSeparator, GeneralCategory::Control, GeneralCategory::Format, GeneralCategory::PrivateUse, GeneralCategory::Surrogate, GeneralCategory::DashPunctuation, GeneralCategory::OpenPunctuation, GeneralCategory::ClosePunctuation, GeneralCategory::ConnectorPunctuation, GeneralCategory::InitialPunctuation, GeneralCategory::FinalPunctuation, GeneralCategory::OtherPunctuation, GeneralCategory::MathSymbol, GeneralCategory::CurrencySymbol, GeneralCategory::ModifierSymbol, GeneralCategory::OtherSymbol, ]; } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)] /// Error value for `impl TryFrom for GeneralCategory`. #[non_exhaustive] pub struct GeneralCategoryOutOfBoundsError; impl TryFrom for GeneralCategory { type Error = GeneralCategoryOutOfBoundsError; /// Construct this [`GeneralCategory`] from an integer, returning /// an error if it is out of bounds fn try_from(val: u8) -> Result { GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError) } } make_enumerated_property! { name: "General_Category"; short_name: "gc"; ident: GeneralCategory; data_marker: crate::provider::PropertyEnumGeneralCategoryV1; singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1; } /// Groupings of multiple General_Category property values. /// /// Instances of `GeneralCategoryGroup` represent the defined multi-category /// values that are useful for users in certain contexts, such as regex. In /// other words, unlike [`GeneralCategory`], this supports groups of general /// categories: for example, `Letter` /// is the union of `UppercaseLetter`, /// `LowercaseLetter`, etc. /// /// See . /// /// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C. /// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter` /// is the union of `UppercaseLetter`, `LowercaseLetter`, etc. /// /// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C. #[derive(Copy, Clone, PartialEq, Debug, Eq)] #[allow(clippy::exhaustive_structs)] // newtype #[repr(transparent)] pub struct GeneralCategoryGroup(pub(crate) u32); impl crate::private::Sealed for GeneralCategoryGroup {} use GeneralCategory as GC; use GeneralCategoryGroup as GCG; #[allow(non_upper_case_globals)] impl GeneralCategoryGroup { /// (`Lu`) An uppercase letter pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32)); /// (`Ll`) A lowercase letter pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32)); /// (`Lt`) A digraphic letter, with first part uppercase pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32)); /// (`Lm`) A modifier letter pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32)); /// (`Lo`) Other letters, including syllables and ideographs pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32)); /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) | (1 << (GC::LowercaseLetter as u32)) | (1 << (GC::TitlecaseLetter as u32))); /// (`L`) The union of all letter categories pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32)) | (1 << (GC::LowercaseLetter as u32)) | (1 << (GC::TitlecaseLetter as u32)) | (1 << (GC::ModifierLetter as u32)) | (1 << (GC::OtherLetter as u32))); /// (`Mn`) A nonspacing combining mark (zero advance width) pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32)); /// (`Mc`) A spacing combining mark (positive advance width) pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32)); /// (`Me`) An enclosing combining mark pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32)); /// (`M`) The union of all mark categories pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32)) | (1 << (GC::EnclosingMark as u32)) | (1 << (GC::SpacingMark as u32))); /// (`Nd`) A decimal digit pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32)); /// (`Nl`) A letterlike numeric character pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32)); /// (`No`) A numeric character of other type pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32)); /// (`N`) The union of all number categories pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32)) | (1 << (GC::LetterNumber as u32)) | (1 << (GC::OtherNumber as u32))); /// (`Zs`) A space character (of various non-zero widths) pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32)); /// (`Zl`) U+2028 LINE SEPARATOR only pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32)); /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32)); /// (`Z`) The union of all separator categories pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32)) | (1 << (GC::LineSeparator as u32)) | (1 << (GC::ParagraphSeparator as u32))); /// (`Cc`) A C0 or C1 control code pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32)); /// (`Cf`) A format control character pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32)); /// (`Co`) A private-use character pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32)); /// (`Cs`) A surrogate code point pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32)); /// (`Cn`) A reserved unassigned code point or a noncharacter pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32)); /// (`C`) The union of all control code, reserved, and unassigned categories pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32)) | (1 << (GC::Format as u32)) | (1 << (GC::PrivateUse as u32)) | (1 << (GC::Surrogate as u32)) | (1 << (GC::Unassigned as u32))); /// (`Pd`) A dash or hyphen punctuation mark pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32)); /// (`Ps`) An opening punctuation mark (of a pair) pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32)); /// (`Pe`) A closing punctuation mark (of a pair) pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32)); /// (`Pc`) A connecting punctuation mark, like a tie pub const ConnectorPunctuation: GeneralCategoryGroup = GCG(1 << (GC::ConnectorPunctuation as u32)); /// (`Pi`) An initial quotation mark pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32)); /// (`Pf`) A final quotation mark pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32)); /// (`Po`) A punctuation mark of other type pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32)); /// (`P`) The union of all punctuation categories pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32)) | (1 << (GC::OpenPunctuation as u32)) | (1 << (GC::ClosePunctuation as u32)) | (1 << (GC::ConnectorPunctuation as u32)) | (1 << (GC::OtherPunctuation as u32)) | (1 << (GC::InitialPunctuation as u32)) | (1 << (GC::FinalPunctuation as u32))); /// (`Sm`) A symbol of mathematical use pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32)); /// (`Sc`) A currency sign pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32)); /// (`Sk`) A non-letterlike modifier symbol pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32)); /// (`So`) A symbol of other type pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32)); /// (`S`) The union of all symbol categories pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32)) | (1 << (GC::CurrencySymbol as u32)) | (1 << (GC::ModifierSymbol as u32)) | (1 << (GC::OtherSymbol as u32))); const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1; /// Return whether the code point belongs in the provided multi-value category. /// /// ``` /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// use icu::properties::CodePointMapData; /// /// let gc = CodePointMapData::::new(); /// /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter); /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A'))); /// /// // U+0B1E ORIYA LETTER NYA /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter); /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ'))); /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ'))); /// /// // U+0301 COMBINING ACUTE ACCENT /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark); /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}'))); /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}'))); /// /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber); /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0'))); /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0'))); /// /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation); /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('('))); /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('('))); /// /// // U+2713 CHECK MARK /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol); /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓'))); /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓'))); /// /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator); /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' '))); /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' '))); /// /// // U+E007F CANCEL TAG /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format); /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}'))); /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}'))); /// ``` pub const fn contains(self, val: GeneralCategory) -> bool { 0 != (1 << (val as u32)) & self.0 } /// Produce a GeneralCategoryGroup that is the inverse of this one /// /// # Example /// /// ```rust /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// /// let letter = GeneralCategoryGroup::Letter; /// let not_letter = letter.complement(); /// /// assert!(not_letter.contains(GeneralCategory::MathSymbol)); /// assert!(!letter.contains(GeneralCategory::MathSymbol)); /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation)); /// assert!(!letter.contains(GeneralCategory::OtherPunctuation)); /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter)); /// assert!(letter.contains(GeneralCategory::UppercaseLetter)); /// ``` pub const fn complement(self) -> Self { // Mask off things not in Self::ALL to guarantee the mask // values stay in-range GeneralCategoryGroup(!self.0 & Self::ALL) } /// Return the group representing all GeneralCategory values /// /// # Example /// /// ```rust /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// /// let all = GeneralCategoryGroup::all(); /// /// assert!(all.contains(GeneralCategory::MathSymbol)); /// assert!(all.contains(GeneralCategory::OtherPunctuation)); /// assert!(all.contains(GeneralCategory::UppercaseLetter)); /// ``` pub const fn all() -> Self { Self(Self::ALL) } /// Return the empty group /// /// # Example /// /// ```rust /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// /// let empty = GeneralCategoryGroup::empty(); /// /// assert!(!empty.contains(GeneralCategory::MathSymbol)); /// assert!(!empty.contains(GeneralCategory::OtherPunctuation)); /// assert!(!empty.contains(GeneralCategory::UppercaseLetter)); /// ``` pub const fn empty() -> Self { Self(0) } /// Take the union of two groups /// /// # Example /// /// ```rust /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// /// let letter = GeneralCategoryGroup::Letter; /// let symbol = GeneralCategoryGroup::Symbol; /// let union = letter.union(symbol); /// /// assert!(union.contains(GeneralCategory::MathSymbol)); /// assert!(!union.contains(GeneralCategory::OtherPunctuation)); /// assert!(union.contains(GeneralCategory::UppercaseLetter)); /// ``` pub const fn union(self, other: Self) -> Self { Self(self.0 | other.0) } /// Take the intersection of two groups /// /// # Example /// /// ```rust /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup}; /// /// let letter = GeneralCategoryGroup::Letter; /// let lu = GeneralCategoryGroup::UppercaseLetter; /// let intersection = letter.intersection(lu); /// /// assert!(!intersection.contains(GeneralCategory::MathSymbol)); /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation)); /// assert!(intersection.contains(GeneralCategory::UppercaseLetter)); /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter)); /// ``` pub const fn intersection(self, other: Self) -> Self { Self(self.0 & other.0) } } impl From for GeneralCategoryGroup { fn from(subcategory: GeneralCategory) -> Self { GeneralCategoryGroup(1 << (subcategory as u32)) } } impl From for GeneralCategoryGroup { fn from(mask: u32) -> Self { // Mask off things not in Self::ALL to guarantee the mask // values stay in-range GeneralCategoryGroup(mask & Self::ALL) } } impl From for u32 { fn from(group: GeneralCategoryGroup) -> Self { group.0 } } /// Enumerated property Script. /// /// This is used with both the Script and Script_Extensions Unicode properties. /// Each character is assigned a single Script, but characters that are used in /// a particular subset of scripts will be in more than one Script_Extensions set. /// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the /// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to /// determine whether a code point belongs to a certain script, you should use /// [`ScriptWithExtensionsBorrowed::has_script`]. /// /// For more information, see UAX #24: . /// See `UScriptCode` in ICU4C. /// /// # Example /// /// ``` /// use icu::properties::{CodePointMapData, props::Script}; /// /// assert_eq!(CodePointMapData::