use core::ops::RangeInclusive; use winnow::stream::ContainsToken as _; use winnow::stream::Offset as _; use winnow::stream::Stream as _; use crate::decoder::StringBuilder; use crate::lexer::APOSTROPHE; use crate::lexer::ML_BASIC_STRING_DELIM; use crate::lexer::ML_LITERAL_STRING_DELIM; use crate::lexer::QUOTATION_MARK; use crate::lexer::WSCHAR; use crate::ErrorSink; use crate::Expected; use crate::ParseError; use crate::Raw; use crate::Span; const ALLOCATION_ERROR: &str = "could not allocate for string"; /// Parse literal string /// /// ```bnf /// ;; Literal String /// /// literal-string = apostrophe *literal-char apostrophe /// /// apostrophe = %x27 ; ' apostrophe /// /// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii /// ``` pub(crate) fn decode_literal_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid literal string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(APOSTROPHE as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = if let Some(stripped) = s.strip_suffix(APOSTROPHE as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; for (i, b) in s.as_bytes().iter().enumerate() { if !LITERAL_CHAR.contains_token(b) { let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("non-single-quote visible characters")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// `literal-char = %x09 / %x20-26 / %x28-7E / non-ascii` const LITERAL_CHAR: ( u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); /// `non-ascii = %x80-D7FF / %xE000-10FFFF` /// - ASCII is 0xxxxxxx /// - First byte for UTF-8 is 11xxxxxx /// - Subsequent UTF-8 bytes are 10xxxxxx const NON_ASCII: RangeInclusive = 0x80..=0xff; /// Parse multi-line literal string /// /// ```bnf /// ;; Multiline Literal String /// /// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body /// ml-literal-string-delim /// ml-literal-string-delim = 3apostrophe /// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ] /// /// mll-content = mll-char / newline /// mll-quotes = 1*2apostrophe /// ``` pub(crate) fn decode_ml_literal_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid multi-line literal string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(ML_LITERAL_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = strip_start_newline(s); let s = if let Some(stripped) = s.strip_suffix(ML_LITERAL_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("'")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s.trim_end_matches('\'') }; for (i, b) in s.as_bytes().iter().enumerate() { if *b == b'\'' || *b == b'\n' { } else if *b == b'\r' { if s.as_bytes().get(i + 1) != Some(&b'\n') { let offset = (&s.as_bytes()[i + 1..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } else if !MLL_CHAR.contains_token(b) { let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes()); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("non-single-quote characters")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// `mll-char = %x09 / %x20-26 / %x28-7E / non-ascii` const MLL_CHAR: ( u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII); /// Parse basic string /// /// ```bnf /// ;; Basic String /// /// basic-string = quotation-mark *basic-char quotation-mark /// /// basic-char = basic-unescaped / escaped /// /// escaped = escape escape-seq-char /// ``` pub(crate) fn decode_basic_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid basic string"; output.clear(); let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(QUOTATION_MARK as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let mut s = if let Some(stripped) = s.strip_suffix(QUOTATION_MARK as char) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; let segment = basic_unescaped(&mut s); if !output.push_str(segment) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } while !s.is_empty() { if s.starts_with("\\") { let _ = s.next_token(); let c = escape_seq_char(&mut s, raw, error); if !output.push_char(c) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } else { let invalid = basic_invalid(&mut s); let start = invalid.offset_from(&raw.as_str()); let end = start + invalid.len(); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[ Expected::Description("non-double-quote visible characters"), Expected::Literal("\\"), ]) .with_unexpected(Span::new_unchecked(start, end)), ); let _ = output.push_str(invalid); } let segment = basic_unescaped(&mut s); if !output.push_str(segment) { let start = segment.offset_from(&raw.as_str()); let end = start + segment.len(); error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)), ); } } } /// `basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii` fn basic_unescaped<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| !BASIC_UNESCAPED.contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } fn basic_invalid<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| (BASIC_UNESCAPED, ESCAPE).contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } /// `basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii` #[allow(clippy::type_complexity)] const BASIC_UNESCAPED: ( (u8, u8), u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); /// `escape = %x5C ; \` const ESCAPE: u8 = b'\\'; /// ```bnf /// escape-seq-char = %x22 ; " quotation mark U+0022 /// escape-seq-char =/ %x5C ; \ reverse solidus U+005C /// escape-seq-char =/ %x62 ; b backspace U+0008 /// escape-seq-char =/ %x66 ; f form feed U+000C /// escape-seq-char =/ %x6E ; n line feed U+000A /// escape-seq-char =/ %x72 ; r carriage return U+000D /// escape-seq-char =/ %x74 ; t tab U+0009 /// escape-seq-char =/ %x75 4HEXDIG ; uXXXX U+XXXX /// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX U+XXXXXXXX /// ``` fn escape_seq_char(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) -> char { const EXPECTED_ESCAPES: &[Expected] = &[ Expected::Literal("b"), Expected::Literal("f"), Expected::Literal("n"), Expected::Literal("r"), Expected::Literal("\\"), Expected::Literal("\""), Expected::Literal("u"), Expected::Literal("U"), ]; let start = stream.checkpoint(); let Some(id) = stream.next_token() else { let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("missing escaped value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(EXPECTED_ESCAPES) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '\\'; }; match id { 'b' => '\u{8}', 'f' => '\u{c}', 'n' => '\n', 'r' => '\r', 't' => '\t', 'u' => hexescape(stream, 4, raw, error), 'U' => hexescape(stream, 8, raw, error), '\\' => '\\', '"' => '"', _ => { stream.reset(&start); let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("missing escaped value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(EXPECTED_ESCAPES) .with_unexpected(Span::new_unchecked(offset, offset)), ); '\\' } } } fn hexescape( stream: &mut &str, num_digits: usize, raw: Raw<'_>, error: &mut dyn ErrorSink, ) -> char { let offset = stream .as_bytes() .offset_for(|b| !HEXDIG.contains_token(b)) .unwrap_or_else(|| stream.eof_offset()) .min(num_digits); #[cfg(feature = "unsafe")] // SAFETY: HEXDIG ensure `offset` is along UTF-8 boundary let value = unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let value = stream.next_slice(offset); if value.len() != num_digits { let offset = stream.offset_from(&raw.as_str()); error.report_error( ParseError::new("too few unicode value digits") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("unicode hexadecimal value")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '�'; } let Some(value) = u32::from_str_radix(value, 16).ok().and_then(char::from_u32) else { let offset = value.offset_from(&raw.as_str()); error.report_error( ParseError::new("invalid value") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Description("unicode hexadecimal value")]) .with_unexpected(Span::new_unchecked(offset, offset)), ); return '�'; }; value } /// `HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"` const HEXDIG: (RangeInclusive, RangeInclusive, RangeInclusive) = (DIGIT, b'A'..=b'F', b'a'..=b'f'); /// `DIGIT = %x30-39 ; 0-9` const DIGIT: RangeInclusive = b'0'..=b'9'; fn strip_start_newline(s: &str) -> &str { s.strip_prefix('\n') .or_else(|| s.strip_prefix("\r\n")) .unwrap_or(s) } /// Parse multi-line basic string /// /// ```bnf /// ;; Multiline Basic String /// /// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body /// ml-basic-string-delim /// ml-basic-string-delim = 3quotation-mark /// /// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] /// /// mlb-content = mlb-char / newline / mlb-escaped-nl /// mlb-char = mlb-unescaped / escaped /// mlb-quotes = 1*2quotation-mark /// ``` pub(crate) fn decode_ml_basic_string<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { const INVALID_STRING: &str = "invalid multi-line basic string"; let s = raw.as_str(); let s = if let Some(stripped) = s.strip_prefix(ML_BASIC_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(0, 0)), ); s }; let s = strip_start_newline(s); let mut s = if let Some(stripped) = s.strip_suffix(ML_BASIC_STRING_DELIM) { stripped } else { error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\"")]) .with_unexpected(Span::new_unchecked(raw.len(), raw.len())), ); s }; let segment = mlb_unescaped(&mut s); if !output.push_str(segment) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } while !s.is_empty() { if s.starts_with("\\") { let _ = s.next_token(); if s.as_bytes() .first() .map(|b| (WSCHAR, b'\r', b'\n').contains_token(b)) .unwrap_or(false) { mlb_escaped_nl(&mut s, raw, error); } else { let c = escape_seq_char(&mut s, raw, error); if !output.push_char(c) { error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(0, raw.len())), ); } } } else if s.starts_with("\r") { let offset = if s.starts_with("\r\n") { "\r\n".len() } else { let start = s.offset_from(&raw.as_str()) + 1; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, start)), ); "\r".len() }; #[cfg(feature = "unsafe")] // SAFETY: Newlines ensure `offset` is along UTF-8 boundary let newline = unsafe { s.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let newline = s.next_slice(offset); if !output.push_str(newline) { let start = newline.offset_from(&raw.as_str()); let end = start + newline.len(); error.report_error( ParseError::new(ALLOCATION_ERROR) .with_unexpected(Span::new_unchecked(start, end)), ); } } else { let invalid = mlb_invalid(&mut s); let start = invalid.offset_from(&raw.as_str()); let end = start + invalid.len(); error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\\"), Expected::Description("characters")]) .with_unexpected(Span::new_unchecked(start, end)), ); let _ = output.push_str(invalid); } let segment = mlb_unescaped(&mut s); if !output.push_str(segment) { let start = segment.offset_from(&raw.as_str()); let end = start + segment.len(); error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)), ); } } } /// ```bnf /// mlb-escaped-nl = escape ws newline *( wschar / newline ) /// ``` fn mlb_escaped_nl(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) { const INVALID_STRING: &str = "invalid multi-line basic string"; let ws_offset = stream .as_bytes() .offset_for(|b| !WSCHAR.contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(ws_offset); } #[cfg(not(feature = "unsafe"))] stream.next_slice(ws_offset); let start = stream.checkpoint(); match stream.next_token() { Some('\n') => {} Some('\r') => { if stream.as_bytes().first() == Some(&b'\n') { let _ = stream.next_token(); } else { let start = stream.offset_from(&raw.as_str()); let end = start; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, end)), ); } } _ => { stream.reset(&start); let start = stream.offset_from(&raw.as_str()); let end = start; error.report_error( ParseError::new(INVALID_STRING) .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, end)), ); } } loop { let start_offset = stream.offset_from(&raw.as_str()); let offset = stream .as_bytes() .offset_for(|b| !(WSCHAR, b'\n').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset); } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset); if stream.starts_with("\r") { let offset = if stream.starts_with("\r\n") { "\r\n".len() } else { let start = stream.offset_from(&raw.as_str()) + 1; error.report_error( ParseError::new("carriage return must be followed by newline") .with_context(Span::new_unchecked(0, raw.len())) .with_expected(&[Expected::Literal("\n")]) .with_unexpected(Span::new_unchecked(start, start)), ); "\r".len() }; #[cfg(feature = "unsafe")] // SAFETY: Newlines ensure `offset` is along UTF-8 boundary let _ = unsafe { stream.next_slice_unchecked(offset) }; #[cfg(not(feature = "unsafe"))] let _ = stream.next_slice(offset); } let end_offset = stream.offset_from(&raw.as_str()); if start_offset == end_offset { break; } } } /// `mlb-unescaped` extended with `mlb-quotes` and `LF` /// /// **warning:** `newline` is not validated /// /// ```bnf /// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ] /// /// mlb-content = mlb-char / newline / mlb-escaped-nl /// mlb-char = mlb-unescaped / escaped /// mlb-quotes = 1*2quotation-mark /// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii /// ``` fn mlb_unescaped<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| !(MLB_UNESCAPED, b'"', b'\n').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } fn mlb_invalid<'i>(stream: &mut &'i str) -> &'i str { let offset = stream .as_bytes() .offset_for(|b| (MLB_UNESCAPED, b'"', b'\n', ESCAPE, '\r').contains_token(b)) .unwrap_or(stream.len()); #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary unsafe { stream.next_slice_unchecked(offset) } #[cfg(not(feature = "unsafe"))] stream.next_slice(offset) } /// `mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii` #[allow(clippy::type_complexity)] const MLB_UNESCAPED: ( (u8, u8), u8, RangeInclusive, RangeInclusive, RangeInclusive, ) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII); /// Parse unquoted key /// /// ```bnf /// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _ /// ``` pub(crate) fn decode_unquoted_key<'i>( raw: Raw<'i>, output: &mut dyn StringBuilder<'i>, error: &mut dyn ErrorSink, ) { let s = raw.as_str(); if s.is_empty() { error.report_error( ParseError::new("unquoted keys cannot be empty") .with_context(Span::new_unchecked(0, s.len())) .with_expected(&[ Expected::Description("letters"), Expected::Description("numbers"), Expected::Literal("-"), Expected::Literal("_"), ]) .with_unexpected(Span::new_unchecked(0, s.len())), ); } for (i, b) in s.as_bytes().iter().enumerate() { if !UNQUOTED_CHAR.contains_token(b) { error.report_error( ParseError::new("invalid unquoted key") .with_context(Span::new_unchecked(0, s.len())) .with_expected(&[ Expected::Description("letters"), Expected::Description("numbers"), Expected::Literal("-"), Expected::Literal("_"), ]) .with_unexpected(Span::new_unchecked(i, i)), ); } } if !output.push_str(s) { error.report_error( ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())), ); } } /// `unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _` const UNQUOTED_CHAR: ( RangeInclusive, RangeInclusive, RangeInclusive, u8, u8, ) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_'); #[cfg(test)] #[cfg(feature = "std")] mod test { use super::*; use crate::decoder::Encoding; use alloc::borrow::Cow; use snapbox::assert_data_eq; use snapbox::prelude::*; use snapbox::str; #[test] fn literal_string() { let cases = [ ( r"'C:\Users\nodejs\templates'", str![[r#"C:\Users\nodejs\templates"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r"'\\ServerX\admin$\system32\'", str![[r#"\\ServerX\admin$\system32\"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"'Tom "Dubs" Preston-Werner'"#, str![[r#"Tom "Dubs" Preston-Werner"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r"'<\i\c*\s*>'", str![[r#"<\i\c*\s*>"#]].raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_literal_string( Raw::new_unchecked(input, Some(Encoding::LiteralString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn ml_literal_string() { let cases = [ ( r"'''I [dw]on't need \d{2} apples'''", str![[r#"I [dw]on't need \d{2} apples"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"''''one_quote''''"#, str!["'one_quote'"].raw(), str![[r#" [] "#]] .raw(), ), ( r#"''' The first newline is trimmed in raw strings. All other whitespace is preserved. '''"#, str![[r#" The first newline is trimmed in raw strings. All other whitespace is preserved. "#]] .raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_ml_literal_string( Raw::new_unchecked(input, Some(Encoding::MlLiteralString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn basic_string() { let cases = [ ( r#""""#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#""content\"trailing""#, str![[r#"content"trailing"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#""content\""#, str![[r#"content\"#]].raw(), str![[r#" [ ParseError { context: Some( 0..10, ), description: "missing escaped value", expected: Some( [ Literal( "b", ), Literal( "f", ), Literal( "n", ), Literal( "r", ), Literal( "\\", ), Literal( "\"", ), Literal( "u", ), Literal( "U", ), ], ), unexpected: Some( 9..9, ), }, ] "#]] .raw(), ), ( r#""content trailing""#, str![[r#" content trailing "#]] .raw(), str![[r#" [ ParseError { context: Some( 0..18, ), description: "invalid basic string", expected: Some( [ Description( "non-double-quote visible characters", ), Literal( "\\", ), ], ), unexpected: Some( 8..9, ), }, ] "#]] .raw(), ), ( r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#, str![[r#" I'm a string. "You can quote me". Name José Location SF. 𠜎 "#]] .raw(), str![[r#" [] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_basic_string( Raw::new_unchecked(input, Some(Encoding::BasicString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn ml_basic_string() { let cases = [ ( r#"""" Roses are red Violets are blue""""#, str![[r#" Roses are red Violets are blue "#]] .raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \""" """"#, str![[r#" """ "#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \\""""#, str![[r#" \"#]].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" The quick brown \ fox jumps over \ the lazy dog.""""#, str!["The quick brown fox jumps over the lazy dog."].raw(), str![[r#" [] "#]] .raw(), ), ( r#""""\ The quick brown \ fox jumps over \ the lazy dog.\ """"#, str!["The quick brown fox jumps over the lazy dog."].raw(), str![[r#" [] "#]] .raw(), ), ( r#""""\ """"#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" \ \ """"#, str![""].raw(), str![[r#" [] "#]] .raw(), ), ( r#"""" """#, str![[r#" """#]].raw(), str![[r#" [ ParseError { context: Some( 0..7, ), description: "invalid multi-line basic string", expected: Some( [ Literal( "\"", ), ], ), unexpected: Some( 7..7, ), }, ] "#]] .raw(), ), ( r#"""" \""""#, str![[r#" \"#]].raw(), str![[r#" [ ParseError { context: Some( 0..9, ), description: "missing escaped value", expected: Some( [ Literal( "b", ), Literal( "f", ), Literal( "n", ), Literal( "r", ), Literal( "\\", ), Literal( "\"", ), Literal( "u", ), Literal( "U", ), ], ), unexpected: Some( 6..6, ), }, ] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_ml_basic_string( Raw::new_unchecked(input, Some(Encoding::MlBasicString), Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } #[test] fn unquoted_keys() { let cases = [ ( "a", str!["a"].raw(), str![[r#" [] "#]] .raw(), ), ( "hello", str!["hello"].raw(), str![[r#" [] "#]] .raw(), ), ( "-", str!["-"].raw(), str![[r#" [] "#]] .raw(), ), ( "_", str!["_"].raw(), str![[r#" [] "#]] .raw(), ), ( "-hello-world-", str!["-hello-world-"].raw(), str![[r#" [] "#]] .raw(), ), ( "_hello_world_", str!["_hello_world_"].raw(), str![[r#" [] "#]] .raw(), ), ( "", str![""].raw(), str![[r#" [ ParseError { context: Some( 0..0, ), description: "unquoted keys cannot be empty", expected: Some( [ Description( "letters", ), Description( "numbers", ), Literal( "-", ), Literal( "_", ), ], ), unexpected: Some( 0..0, ), }, ] "#]] .raw(), ), ]; for (input, expected, expected_error) in cases { let mut error = Vec::new(); let mut actual = Cow::Borrowed(""); decode_unquoted_key( Raw::new_unchecked(input, None, Default::default()), &mut actual, &mut error, ); assert_data_eq!(actual.as_ref(), expected); assert_data_eq!(error.to_debug(), expected_error); } } }