#![allow(missing_copy_implementations)] //! Macro for making a static NSString. //! //! This closely follows what clang does, see: //! - Apple: //! - GNUStep 2.0 (not yet supported): //! - Other (not yet supported): //! //! Note that this uses the `CFString` static, while `clang` has support for //! generating a pure `NSString`. We don't support that yet (since I don't //! know the use-case), but we definitely could! //! See: //! //! See also the following crates that implement UTF-16 conversion: //! `utf16_lit`, `windows`, `const_utf16`, `wide-literals`, ... use core::ffi::c_void; use crate::NSString; use objc2::runtime::AnyClass; // This is defined in CoreFoundation, but we don't emit a link attribute // here because it is already linked via Foundation. // // Although this is a "private" (underscored) symbol, it is directly // referenced in Objective-C binaries. So it's safe for us to reference. extern "C" { pub static __CFConstantStringClassReference: AnyClass; } /// Structure used to describe a constant `CFString`. /// /// This struct is the same as [`CF_CONST_STRING`], which contains /// [`CFRuntimeBase`]. While the documentation clearly says that the ABI of /// `CFRuntimeBase` should not be relied on, we can rely on it as long as we /// only do it with regards to `CFString` (because `clang` does this as well). /// /// [`CFRuntimeBase`]: /// [`CF_CONST_STRING`]: #[repr(C)] #[derive(Debug)] pub struct CFConstString { isa: &'static AnyClass, // Important that we don't use `usize` here, since that would be wrong on // big-endian systems! cfinfo: u32, #[cfg(target_pointer_width = "64")] _rc: u32, data: *const c_void, len: usize, } // Required to place in a `static`. unsafe impl Sync for CFConstString {} impl CFConstString { // From `CFString.c`: // // > !!! Note: Constant CFStrings use the bit patterns: // > C8 (11001000 = default allocator, not inline, not freed contents; 8-bit; has NULL byte; doesn't have length; is immutable) // > D0 (11010000 = default allocator, not inline, not freed contents; Unicode; is immutable) // > The bit usages should not be modified in a way that would effect these bit patterns. // // Hence CoreFoundation guarantees that these two are always valid. // // The `CFTypeID` of `CFStringRef` is guaranteed to always be 7: // const FLAGS_ASCII: u32 = 0x07_C8; const FLAGS_UTF16: u32 = 0x07_D0; pub const unsafe fn new_ascii(isa: &'static AnyClass, data: &'static [u8]) -> Self { Self { isa, cfinfo: Self::FLAGS_ASCII, #[cfg(target_pointer_width = "64")] _rc: 0, data: data.as_ptr().cast(), // The length does not include the trailing NUL. len: data.len() - 1, } } pub const unsafe fn new_utf16(isa: &'static AnyClass, data: &'static [u16]) -> Self { Self { isa, cfinfo: Self::FLAGS_UTF16, #[cfg(target_pointer_width = "64")] _rc: 0, data: data.as_ptr().cast(), // The length does not include the trailing NUL. len: data.len() - 1, } } #[inline] pub const fn as_nsstring_const(&self) -> &NSString { let ptr: *const Self = self; unsafe { &*ptr.cast::() } } // This is deliberately not `const` to prevent the result from being used // in other statics, since that is only possible if the // `unstable-static-nsstring` feature is enabled. #[inline] pub fn as_nsstring(&self) -> &NSString { self.as_nsstring_const() } } /// Returns `true` if `bytes` is entirely ASCII with no interior NULs. pub const fn is_ascii_no_nul(bytes: &[u8]) -> bool { let mut i = 0; while i < bytes.len() { let byte = bytes[i]; if !byte.is_ascii() || byte == b'\0' { return false; } i += 1; } true } #[derive(Debug)] pub struct Utf16Char { pub repr: [u16; 2], pub len: usize, } impl Utf16Char { const fn encode(ch: u32) -> Self { if ch <= 0xffff { Self { repr: [ch as u16, 0], len: 1, } } else { let payload = ch - 0x10000; let hi = (payload >> 10) | 0xd800; let lo = (payload & 0x3ff) | 0xdc00; Self { repr: [hi as u16, lo as u16], len: 2, } } } #[cfg(test)] fn as_slice(&self) -> &[u16] { &self.repr[..self.len] } } #[derive(Debug)] pub struct EncodeUtf16Iter { str: &'static [u8], index: usize, } impl EncodeUtf16Iter { pub const fn new(str: &'static [u8]) -> Self { Self { str, index: 0 } } pub const fn next(self) -> Option<(Self, Utf16Char)> { if self.index >= self.str.len() { None } else { let (index, ch) = decode_utf8(self.str, self.index); Some((Self { index, ..self }, Utf16Char::encode(ch))) } } } // (&str bytes, index) -> (new index, decoded char) const fn decode_utf8(s: &[u8], i: usize) -> (usize, u32) { let b0 = s[i]; match b0 { // one-byte seq 0b0000_0000..=0b0111_1111 => { let decoded = b0 as u32; (i + 1, decoded) } // two-byte seq 0b1100_0000..=0b1101_1111 => { let decoded = ((b0 as u32 & 0x1f) << 6) | (s[i + 1] as u32 & 0x3f); (i + 2, decoded) } // 3 byte seq 0b1110_0000..=0b1110_1111 => { let decoded = ((b0 as u32 & 0x0f) << 12) | ((s[i + 1] as u32 & 0x3f) << 6) | (s[i + 2] as u32 & 0x3f); (i + 3, decoded) } // 4 byte seq 0b1111_0000..=0b1111_0111 => { let decoded = ((b0 as u32 & 0x07) << 18) | ((s[i + 1] as u32 & 0x3f) << 12) | ((s[i + 2] as u32 & 0x3f) << 6) | (s[i + 3] as u32 & 0x3f); (i + 4, decoded) } // continuation bytes, or never-valid bytes. 0b1000_0000..=0b1011_1111 | 0b1111_1000..=0b1111_1111 => { panic!("Encountered invalid bytes") } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_is_ascii() { assert!(is_ascii_no_nul(b"a")); assert!(is_ascii_no_nul(b"abc")); assert!(!is_ascii_no_nul(b"\xff")); assert!(!is_ascii_no_nul(b"\0")); assert!(!is_ascii_no_nul(b"a\0b")); assert!(!is_ascii_no_nul(b"ab\0")); assert!(!is_ascii_no_nul(b"a\0b\0")); } #[test] #[ignore = "slow, enable this if working on ns_string!"] fn test_decode_utf8() { for c in '\u{0}'..=core::char::MAX { let mut buf; for off in 0..4 { // Ensure we see garbage if we read outside bounds. buf = [0xff; 8]; let len = c.encode_utf8(&mut buf[off..(off + 4)]).len(); let (end_idx, decoded) = decode_utf8(&buf, off); assert_eq!( (end_idx, decoded), (off + len, c as u32), "failed for U+{code:04X} ({ch:?}) encoded as {buf:#x?} over {range:?}", code = c as u32, ch = c, buf = &buf[off..(off + len)], range = off..(off + len), ); } } } #[test] #[ignore = "slow, enable this if working on ns_string!"] fn encode_utf16() { for c in '\u{0}'..=core::char::MAX { assert_eq!( c.encode_utf16(&mut [0u16; 2]), Utf16Char::encode(c as u32).as_slice(), "failed for U+{:04X} ({:?})", c as u32, c ); } } }