// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Core functionality for `ixdtf`'s parsers
use crate::{ParseError, ParserResult};
mod private {
pub trait Sealed {}
}
/// A trait for defining various supported encodings
/// and implementing functionality that is encoding
/// sensitive / specific.
pub trait EncodingType: private::Sealed {
type CodeUnit: PartialEq + core::fmt::Debug + Clone;
/// Get a slice from the underlying source using for start..end
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]>;
/// Retrieve the provided code unit index and returns the value as an ASCII byte
/// or None if the value is not ASCII representable.
fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult>;
/// Checks for the known calendar annotation key `u-ca`.
fn check_calendar_key(key: &[Self::CodeUnit]) -> bool;
}
/// A marker type that signals a parser should parse the source as UTF-16 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added
pub struct Utf16;
impl private::Sealed for Utf16 {}
impl EncodingType for Utf16 {
type CodeUnit = u16;
fn slice(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}
fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult > {
source.get(index).copied().map(to_ascii_byte).transpose()
}
fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == [0x75, 0x2d, 0x63, 0x61]
}
}
#[inline]
fn to_ascii_byte(b: u16) -> ParserResult {
if !(0x01..0x7F).contains(&b) {
return Err(ParseError::NonAsciiCodePoint);
}
Ok(b as u8)
}
/// A marker type that signals a parser should parse the source as UTF-8 bytes.
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::exhaustive_structs)] // ZST Marker trait, no fields should be added.
pub struct Utf8;
impl private::Sealed for Utf8 {}
impl EncodingType for Utf8 {
type CodeUnit = u8;
fn slice<'a>(source: &[Self::CodeUnit], start: usize, end: usize) -> Option<&[Self::CodeUnit]> {
source.get(start..end)
}
fn get_ascii(source: &[Self::CodeUnit], index: usize) -> ParserResult> {
Ok(source.get(index).copied())
}
fn check_calendar_key(key: &[Self::CodeUnit]) -> bool {
key == "u-ca".as_bytes()
}
}
// ==== Mini cursor implementation for Iso8601 targets ====
/// `Cursor` is a small cursor implementation for parsing Iso8601 grammar.
#[derive(Debug)]
pub(crate) struct Cursor<'a, T: EncodingType> {
pos: usize,
source: &'a [T::CodeUnit],
}
impl<'a, T: EncodingType> Cursor<'a, T> {
/// Create a new cursor from a source UTF8 string.
#[must_use]
pub fn new(source: &'a [T::CodeUnit]) -> Self {
Self { pos: 0, source }
}
/// Returns a string value from a slice of the cursor.
pub(crate) fn slice(&self, start: usize, end: usize) -> Option<&'a [T::CodeUnit]> {
T::slice(self.source, start, end)
}
/// Get current position
pub(crate) const fn pos(&self) -> usize {
self.pos
}
/// Get current position
pub(crate) fn set_position(&mut self, pos: usize) {
self.pos = pos;
}
/// Peek the value at next position (current + 1).
pub(crate) fn peek(&self) -> ParserResult > {
self.peek_n(1)
}
/// Returns current position in source as `char`.
pub(crate) fn current(&self) -> ParserResult > {
self.peek_n(0)
}
/// Peeks the value at `n` as a `char`.
pub(crate) fn peek_n(&self, n: usize) -> ParserResult > {
T::get_ascii(self.source, self.pos + n)
}
/// Runs the provided check on the current position.
pub(crate) fn check(&self, f: F) -> ParserResult>
where
F: FnOnce(u8) -> bool,
{
Ok(self.current()?.map(f))
}
/// Runs the provided check on current position returns the default value if None.
pub(crate) fn check_or(&self, default: bool, f: F) -> ParserResult
where
F: FnOnce(u8) -> bool,
{
Ok(self.current()?.map_or(default, f))
}
/// Returns `Cursor`'s current char and advances to the next position.
pub(crate) fn next(&mut self) -> ParserResult> {
let result = self.current();
self.advance_n(1);
result
}
/// Returns the next value as a digit
///
/// # Errors
/// - Returns an AbruptEnd error if cursor ends.
pub(crate) fn next_digit(&mut self) -> ParserResult > {
let ascii_char = self.next_or(ParseError::AbruptEnd { location: "digit" })?;
if ascii_char.is_ascii_digit() {
Ok(Some(ascii_char - 48))
} else {
Ok(None)
}
}
/// A utility next method that returns an `AbruptEnd` error if invalid.
pub(crate) fn next_or(&mut self, err: ParseError) -> ParserResult {
self.next()?.ok_or(err)
}
/// Advances the cursor's position by n code points.
pub(crate) fn advance_n(&mut self, n: usize) {
self.pos += n;
}
// Advances the cursor by 1 code point.
pub(crate) fn advance(&mut self) {
self.advance_n(1)
}
/// Utility function to advance when a condition is true
pub(crate) fn advance_if(&mut self, condition: bool) {
if condition {
self.advance();
}
}
/// Closes the current cursor by checking if all contents have been consumed. If not, returns an error for invalid syntax.
pub(crate) fn close(&mut self) -> ParserResult<()> {
if self.pos < self.source.len() {
return Err(ParseError::InvalidEnd);
}
Ok(())
}
}