use super::*; use snapbox::assert_data_eq; use snapbox::prelude::*; use snapbox::str; #[test] fn test_lex_ascii_char() { let cases = [( ".trailing", str![[r#" Token { kind: Dot, span: 0..1, } "#]] .raw(), str!["trailing"].raw(), )]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_ascii_char(&mut stream, TokenKind::Dot); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_whitespace() { let cases = [ ( " ", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str![].raw(), ), ( " \t \t \t ", str![[r#" Token { kind: Whitespace, span: 0..9, } "#]] .raw(), str![].raw(), ), ( " \n", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str![[r#" "#]] .raw(), ), ( " #", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str!["#"].raw(), ), ( " a", str![[r#" Token { kind: Whitespace, span: 0..1, } "#]] .raw(), str!["a"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_whitespace(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_comment() { let cases = [ ( "#", str![[r#" Token { kind: Comment, span: 0..1, } "#]] .raw(), str![""].raw(), ), ( "# content", str![[r#" Token { kind: Comment, span: 0..9, } "#]] .raw(), str![""].raw(), ), ( "# content \ntrailing", str![[r#" Token { kind: Comment, span: 0..10, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ( "# content \r\ntrailing", str![[r#" Token { kind: Comment, span: 0..10, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ( "# content \0continue", str![[r#" Token { kind: Comment, span: 0..19, } "#]] .raw(), str![""].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_comment(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_crlf() { let cases = [ ( "\r\ntrailing", str![[r#" Token { kind: Newline, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( "\rtrailing", str![[r#" Token { kind: Newline, span: 0..1, } "#]] .raw(), str!["trailing"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_crlf(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_literal_string() { let cases = [ ( "''", str![[r#" Token { kind: LiteralString, span: 0..2, } "#]] .raw(), str![""].raw(), ), ( "''trailing", str![[r#" Token { kind: LiteralString, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( "'content'trailing", str![[r#" Token { kind: LiteralString, span: 0..9, } "#]] .raw(), str!["trailing"].raw(), ), ( "'content", str![[r#" Token { kind: LiteralString, span: 0..8, } "#]] .raw(), str![""].raw(), ), ( "'content\ntrailing", str![[r#" Token { kind: LiteralString, span: 0..8, } "#]] .raw(), str![[r#" trailing "#]] .raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_literal_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_ml_literal_string() { let cases = [ ( "''''''", str![[r#" Token { kind: MlLiteralString, span: 0..6, } "#]] .raw(), str![""].raw(), ), ( "''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..6, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''content'''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..13, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''content", str![[r#" Token { kind: MlLiteralString, span: 0..10, } "#]] .raw(), str![""].raw(), ), ( "'''content'", str![[r#" Token { kind: MlLiteralString, span: 0..11, } "#]] .raw(), str![""].raw(), ), ( "'''content''", str![[r#" Token { kind: MlLiteralString, span: 0..12, } "#]] .raw(), str![""].raw(), ), ( "'''content\ntrailing", str![[r#" Token { kind: MlLiteralString, span: 0..19, } "#]] .raw(), str![""].raw(), ), ( "'''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..7, } "#]] .raw(), str!["trailing"].raw(), ), ( "''''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..8, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..8, } "#]] .raw(), str!["'trailing"].raw(), ), ( "'''''content''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..16, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''content'''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..17, } "#]] .raw(), str!["trailing"].raw(), ), ( "'''''content''''''trailing", str![[r#" Token { kind: MlLiteralString, span: 0..17, } "#]] .raw(), str!["'trailing"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_ml_literal_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_basic_string() { let cases = [ ( r#""""#, str![[r#" Token { kind: BasicString, span: 0..2, } "#]] .raw(), str![].raw(), ), ( r#"""trailing"#, str![[r#" Token { kind: BasicString, span: 0..2, } "#]] .raw(), str!["trailing"].raw(), ), ( r#""content"trailing"#, str![[r#" Token { kind: BasicString, span: 0..9, } "#]] .raw(), str!["trailing"].raw(), ), ( r#""content"#, str![[r#" Token { kind: BasicString, span: 0..8, } "#]] .raw(), str![].raw(), ), ( r#""content\ntrailing"#, str![[r#" Token { kind: BasicString, span: 0..18, } "#]] .raw(), str![].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_basic_string(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[test] fn test_lex_atom() { let cases = [ ( "hello", str![[r#" Token { kind: Atom, span: 0..5, } "#]] .raw(), str![""].raw(), ), ( "hello = world", str![[r#" Token { kind: Atom, span: 0..5, } "#]] .raw(), str![" = world"].raw(), ), ( "1.100e100 ]", str![[r#" Token { kind: Atom, span: 0..1, } "#]] .raw(), str![".100e100 ]"].raw(), ), ( "a.b.c = 5", str![[r#" Token { kind: Atom, span: 0..1, } "#]] .raw(), str![".b.c = 5"].raw(), ), ( "true ]", str![[r#" Token { kind: Atom, span: 0..4, } "#]] .raw(), str![" ]"].raw(), ), ]; for (stream, expected_tokens, expected_stream) in cases { dbg!(stream); let mut stream = Stream::new(stream); let actual_tokens = lex_atom(&mut stream); assert_data_eq!(actual_tokens.to_debug(), expected_tokens.raw()); let stream = *stream; assert_data_eq!(stream, expected_stream.raw()); } } #[track_caller] fn t(input: &str, expected: impl IntoData) { let source = crate::Source::new(input); let actual = source.lex().into_vec(); assert_data_eq!(actual.to_debug(), expected); if !actual.is_empty() { let spans = actual.iter().map(|t| t.span()).collect::>(); assert_eq!(spans.first().unwrap().start(), 0); assert_eq!(spans.last().unwrap().end(), input.len()); for i in 0..(spans.len() - 1) { let current = &spans[i]; let next = &spans[i + 1]; assert_eq!(current.end(), next.start()); } } } #[test] fn literal_strings() { t( "''", str![[r#" [ Token { kind: LiteralString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "''''''", str![[r#" [ Token { kind: MlLiteralString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( "'''\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( "'a'", str![[r#" [ Token { kind: LiteralString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "'\"a'", str![[r#" [ Token { kind: LiteralString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "''''a'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "'''\n'a\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..10, }, Token { kind: Eof, span: 10..10, }, ] "#]] .raw(), ); t( "'''a\n'a\r\n'''", str![[r#" [ Token { kind: MlLiteralString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); } #[test] fn basic_strings() { t( r#""""#, str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( r#""""""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""a""#, str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( r#""""a""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""\t""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\u0000""#, str![[r#" [ Token { kind: BasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( r#""\U00000000""#, str![[r#" [ Token { kind: BasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\U000A0000""#, str![[r#" [ Token { kind: BasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\\t""#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( "\"\t\"", str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\"\"\n\t\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\\\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\\\n \t \t \\\r\n \t \n \t \r\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..34, }, Token { kind: Eof, span: 34..34, }, ] "#]] .raw(), ); t( r#""\r""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\n""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""\b""#, str![[r#" [ Token { kind: BasicString, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( r#""a\fa""#, str![[r#" [ Token { kind: BasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""\"a""#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( "\"\"\"\na\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..8, }, Token { kind: Eof, span: 8..8, }, ] "#]] .raw(), ); t( "\"\"\"\n\"\"\"", str![[r#" [ Token { kind: MlBasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""""a\"""b""""#, str![[r#" [ Token { kind: MlBasicString, span: 0..12, }, Token { kind: Eof, span: 12..12, }, ] "#]] .raw(), ); t( r#""\a"#, str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\\\n", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Newline, span: 2..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "\"\\\r\n", str![[r#" [ Token { kind: BasicString, span: 0..3, }, Token { kind: Newline, span: 3..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "\"\\", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "\"\u{0}", str![[r#" [ Token { kind: BasicString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( r#""\U00""#, str![[r#" [ Token { kind: BasicString, span: 0..6, }, Token { kind: Eof, span: 6..6, }, ] "#]] .raw(), ); t( r#""\U00"#, str![[r#" [ Token { kind: BasicString, span: 0..5, }, Token { kind: Eof, span: 5..5, }, ] "#]] .raw(), ); t( r#""\uD800"#, str![[r#" [ Token { kind: BasicString, span: 0..7, }, Token { kind: Eof, span: 7..7, }, ] "#]] .raw(), ); t( r#""\UFFFFFFFF"#, str![[r#" [ Token { kind: BasicString, span: 0..11, }, Token { kind: Eof, span: 11..11, }, ] "#]] .raw(), ); } #[test] fn keylike() { t( "foo", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "0bar", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "bar0", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "1234", str![[r#" [ Token { kind: Atom, span: 0..4, }, Token { kind: Eof, span: 4..4, }, ] "#]] .raw(), ); t( "a-b", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "a_B", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "-_-", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( "___", str![[r#" [ Token { kind: Atom, span: 0..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); } #[test] fn all() { t( " a ", str![[r#" [ Token { kind: Whitespace, span: 0..1, }, Token { kind: Atom, span: 1..2, }, Token { kind: Whitespace, span: 2..3, }, Token { kind: Eof, span: 3..3, }, ] "#]] .raw(), ); t( " a\t [[]] \t [] {} , . =\n# foo \r\n#foo \n ", str![[r#" [ Token { kind: Whitespace, span: 0..1, }, Token { kind: Atom, span: 1..2, }, Token { kind: Whitespace, span: 2..4, }, Token { kind: LeftSquareBracket, span: 4..5, }, Token { kind: LeftSquareBracket, span: 5..6, }, Token { kind: RightSquareBracket, span: 6..7, }, Token { kind: RightSquareBracket, span: 7..8, }, Token { kind: Whitespace, span: 8..11, }, Token { kind: LeftSquareBracket, span: 11..12, }, Token { kind: RightSquareBracket, span: 12..13, }, Token { kind: Whitespace, span: 13..14, }, Token { kind: LeftCurlyBracket, span: 14..15, }, Token { kind: RightCurlyBracket, span: 15..16, }, Token { kind: Whitespace, span: 16..17, }, Token { kind: Comma, span: 17..18, }, Token { kind: Whitespace, span: 18..19, }, Token { kind: Dot, span: 19..20, }, Token { kind: Whitespace, span: 20..21, }, Token { kind: Equals, span: 21..22, }, Token { kind: Newline, span: 22..23, }, Token { kind: Comment, span: 23..29, }, Token { kind: Newline, span: 29..31, }, Token { kind: Comment, span: 31..36, }, Token { kind: Newline, span: 36..37, }, Token { kind: Whitespace, span: 37..38, }, Token { kind: Eof, span: 38..38, }, ] "#]] .raw(), ); } #[test] fn bare_cr_bad() { t( "\r", str![[r#" [ Token { kind: Newline, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); t( "'\n", str![[r#" [ Token { kind: LiteralString, span: 0..1, }, Token { kind: Newline, span: 1..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "'\u{0}", str![[r#" [ Token { kind: LiteralString, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); t( "'", str![[r#" [ Token { kind: LiteralString, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); t( "\u{0}", str![[r#" [ Token { kind: Atom, span: 0..1, }, Token { kind: Eof, span: 1..1, }, ] "#]] .raw(), ); } #[test] fn bad_comment() { t( "#\u{0}", str![[r#" [ Token { kind: Comment, span: 0..2, }, Token { kind: Eof, span: 2..2, }, ] "#]] .raw(), ); }