1#![allow(dead_code)]
4
5use std::fmt;
6
7#[derive(Debug)]
11pub(crate) struct Lexer<'s> {
12 src: &'s str,
14
15 off: usize,
17
18 level: usize,
21}
22
23#[derive(Copy, Clone, Debug, PartialEq, Eq)]
26pub(crate) struct Lexeme<'s>(pub bool, pub Token, pub usize, pub &'s str);
27
28#[derive(Debug, Clone)]
31pub struct OwnedLexeme(
32 pub(crate) bool,
33 pub(crate) Token,
34 pub(crate) usize,
35 pub(crate) String,
36);
37
38#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
39pub(crate) enum Token {
40 Arrow,
42 AArrow,
44 At,
46 Colon,
48 CColon,
50 Comma,
52 Dot,
54 Ident,
56 LAngle,
58 LBrace,
60 LBracket,
62 LLBrace,
64 LParen,
66 NumDec,
68 NumHex,
71 Pipe,
73 Pound,
75 RAngle,
77 RBrace,
79 RBracket,
81 RParen,
83 RRBrace,
85 String,
88 Text,
90
91 Unexpected,
93}
94
95impl<'s> Lexer<'s> {
96 pub(crate) fn new(src: &'s str) -> Self {
97 Self {
98 src,
99 off: 0,
100 level: 0,
101 }
102 }
103
104 fn next_text_token(&mut self) -> Option<Lexeme<'s>> {
106 let bytes = self.src.as_bytes();
107
108 use Token as T;
109 Some(match bytes.first()? {
110 b'{' if bytes.get(1) == Some(&b'{') => {
111 self.advance(1);
112 self.take(false, T::LLBrace, 1)
113 }
114
115 b'{' => {
116 self.level += 1;
117 self.take(false, T::LBrace, 1)
118 }
119
120 b'}' if bytes.get(1) == Some(&b'}') => {
121 self.advance(1);
122 self.take(false, T::RRBrace, 1)
123 }
124
125 b'}' => self.take(false, T::RBrace, 1),
130
131 _ => self.take_until(false, T::Text, |b| b"{}".contains(&b)),
132 })
133 }
134
135 fn next_expr_token(&mut self) -> Option<Lexeme<'s>> {
137 let ws = self.take_whitespace();
138 let bytes = self.src.as_bytes();
139
140 use Token as T;
141 Some(match bytes.first()? {
142 b'-' if bytes.get(1) == Some(&b'>') => self.take(ws, T::Arrow, 2),
143
144 b'=' if bytes.get(1) == Some(&b'>') => self.take(ws, T::AArrow, 2),
145
146 b'@' => self.take(ws, T::At, 1),
147
148 b':' if bytes.get(1) == Some(&b':') => self.take(ws, T::CColon, 2),
149
150 b':' => self.take(ws, T::Colon, 1),
151
152 b',' => self.take(ws, T::Comma, 1),
153
154 b'.' => self.take(ws, T::Dot, 1),
155
156 b'0' if bytes.get(1) == Some(&b'x')
157 && bytes.get(2).is_some_and(|b| is_valid_hex_byte(*b)) =>
158 {
159 self.advance(2);
160 self.take_until(ws, T::NumHex, |c| !is_valid_hex_byte(c))
161 }
162
163 b'0'..=b'9' => self.take_until(ws, T::NumDec, |c| !is_valid_decimal_byte(c)),
164
165 b'a'..=b'z' | b'A'..=b'Z' | b'_' => {
166 self.take_until(ws, T::Ident, |c| !is_valid_identifier_byte(c))
167 }
168
169 b'<' => self.take(ws, T::LAngle, 1),
170
171 b'{' => {
172 self.level += 1;
173 self.take(ws, T::LBrace, 1)
174 }
175
176 b'[' => self.take(ws, T::LBracket, 1),
177
178 b'(' => self.take(ws, T::LParen, 1),
179
180 b'|' => self.take(ws, T::Pipe, 1),
181
182 b'#' => self.take(ws, T::Pound, 1),
183
184 b'>' => self.take(ws, T::RAngle, 1),
185
186 b'}' => {
187 self.level -= 1;
188 self.take(ws, T::RBrace, 1)
189 }
190
191 b']' => self.take(ws, T::RBracket, 1),
192
193 b')' => self.take(ws, T::RParen, 1),
194
195 b'\'' => {
196 let mut escaped = true;
199 for (i, b) in self.src.bytes().enumerate() {
200 if escaped {
201 escaped = false;
202 } else if b == b'\\' {
203 escaped = true;
204 } else if b == b'\'' {
205 self.advance(1);
206 let content = self.take(ws, T::String, i - 1);
207 self.advance(1);
208 return Some(content);
209 }
210 }
211
212 self.take(ws, T::Unexpected, self.src.len())
215 }
216
217 _ => {
220 let next_boundary = (1..=self.src.len())
221 .find(|&i| self.src.is_char_boundary(i))
222 .unwrap_or(self.src.len());
223 self.take(ws, T::Unexpected, next_boundary)
224 }
225 })
226 }
227
228 fn take_whitespace(&mut self) -> bool {
231 let Lexeme(_, _, _, slice) =
232 self.take_until(false, Token::Unexpected, |b| !b.is_ascii_whitespace());
233 !slice.is_empty()
234 }
235
236 fn take_until(&mut self, ws: bool, t: Token, p: impl FnMut(u8) -> bool) -> Lexeme<'s> {
240 let n = self.src.bytes().position(p).unwrap_or(self.src.len());
241 self.take(ws, t, n)
242 }
243
244 fn take(&mut self, ws: bool, t: Token, n: usize) -> Lexeme<'s> {
251 let start = self.off;
252 let slice = &self.src[..n];
253 self.advance(n);
254
255 Lexeme(ws, t, start, slice)
256 }
257
258 fn advance(&mut self, n: usize) {
265 self.src = &self.src[n..];
266 self.off += n;
267 }
268}
269
270impl Lexeme<'_> {
271 pub(crate) fn detach(&self) -> OwnedLexeme {
273 OwnedLexeme(self.0, self.1, self.2, self.3.to_owned())
274 }
275}
276
277impl<'s> Iterator for Lexer<'s> {
278 type Item = Lexeme<'s>;
279
280 fn next(&mut self) -> Option<Self::Item> {
281 if self.level == 0 {
282 self.next_text_token()
283 } else {
284 self.next_expr_token()
285 }
286 }
287}
288
289impl fmt::Display for OwnedLexeme {
290 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
291 use OwnedLexeme as L;
292 use Token as T;
293
294 if self.0 {
295 write!(f, "whitespace followed by ")?;
296 }
297
298 match self {
299 L(_, T::Arrow, _, _) => write!(f, "'->'"),
300 L(_, T::AArrow, _, _) => write!(f, "'=>'"),
301 L(_, T::At, _, _) => write!(f, "'@'"),
302 L(_, T::Colon, _, _) => write!(f, "':'"),
303 L(_, T::CColon, _, _) => write!(f, "'::'"),
304 L(_, T::Comma, _, _) => write!(f, "','"),
305 L(_, T::Dot, _, _) => write!(f, "'.'"),
306 L(_, T::Ident, _, s) => write!(f, "identifier {s:?}"),
307 L(_, T::LAngle, _, _) => write!(f, "'<'"),
308 L(_, T::LBrace, _, _) => write!(f, "'{{'"),
309 L(_, T::LBracket, _, _) => write!(f, "'['"),
310 L(_, T::LLBrace, _, _) => write!(f, "'{{{{'"),
311 L(_, T::LParen, _, _) => write!(f, "'('"),
312 L(_, T::NumDec, _, s) => write!(f, "decimal number {s:?}"),
313 L(_, T::NumHex, _, s) => write!(f, "hexadecimal number {s:?}"),
314 L(_, T::Pipe, _, _) => write!(f, "'|'"),
315 L(_, T::Pound, _, _) => write!(f, "'#'"),
316 L(_, T::RAngle, _, _) => write!(f, "'>'"),
317 L(_, T::RBrace, _, _) => write!(f, "'}}'"),
318 L(_, T::RBracket, _, _) => write!(f, "']'"),
319 L(_, T::RParen, _, _) => write!(f, "')'"),
320 L(_, T::RRBrace, _, _) => write!(f, "'}}}}'"),
321 L(_, T::String, _, s) => write!(f, "string {s:?}"),
322 L(_, T::Text, _, s) => write!(f, "text {s:?}"),
323 L(_, T::Unexpected, _, s) => {
324 write!(f, "\"")?;
325 for b in s.bytes() {
326 match b {
327 b'"' => write!(f, "\\\"")?,
328 b'\\' => write!(f, "\\\\")?,
329 b'\n' => write!(f, "\\n")?,
330 b'\t' => write!(f, "\\t")?,
331 b'\r' => write!(f, "\\r")?,
332 b if b.is_ascii_graphic() || b == b' ' => write!(f, "{}", b as char)?,
333 b => write!(f, "\\x{:02X}", b)?,
334 }
335 }
336 write!(f, "\"")
337 }
338 }?;
339
340 write!(f, " at offset {}", self.2)
341 }
342}
343
344impl fmt::Display for Token {
345 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
346 use Token as T;
347 match self {
348 T::Arrow => write!(f, "'->'"),
349 T::AArrow => write!(f, "'=>'"),
350 T::At => write!(f, "'@'"),
351 T::Colon => write!(f, "':'"),
352 T::CColon => write!(f, "'::'"),
353 T::Comma => write!(f, "','"),
354 T::Dot => write!(f, "'.'"),
355 T::Ident => write!(f, "an identifier"),
356 T::LAngle => write!(f, "'<'"),
357 T::LBrace => write!(f, "'{{'"),
358 T::LBracket => write!(f, "'['"),
359 T::LLBrace => write!(f, "'{{{{'"),
360 T::LParen => write!(f, "'('"),
361 T::NumDec => write!(f, "a decimal number"),
362 T::NumHex => write!(f, "a hexadecimal number"),
363 T::Pipe => write!(f, "'|'"),
364 T::Pound => write!(f, "'#'"),
365 T::RAngle => write!(f, "'>'"),
366 T::RBrace => write!(f, "'}}'"),
367 T::RBracket => write!(f, "']'"),
368 T::RParen => write!(f, "')'"),
369 T::RRBrace => write!(f, "'}}}}'"),
370 T::String => write!(f, "a string"),
371 T::Text => write!(f, "text"),
372 T::Unexpected => write!(f, "unexpected input"),
373 }
374 }
375}
376
377fn is_valid_identifier_byte(b: u8) -> bool {
378 matches!(b, b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9')
379}
380
381fn is_valid_hex_byte(b: u8) -> bool {
382 matches!(b, b'_' | b'a'..=b'f' | b'A'..=b'F' | b'0'..=b'9')
383}
384
385fn is_valid_decimal_byte(b: u8) -> bool {
386 matches!(b, b'_' | b'0'..=b'9')
387}
388
389#[cfg(test)]
390mod tests {
391 use super::*;
392 use Lexeme as L;
393 use insta::assert_snapshot;
394
395 fn lexemes(src: &str) -> String {
396 Lexer::new(src)
397 .map(|L(ws, t, o, s)| {
398 let safe_s: String = s
400 .bytes()
401 .map(|b| match b {
402 b'"' => "\\\"".to_string(),
403 b'\\' => "\\\\".to_string(),
404 b'\n' => "\\n".to_string(),
405 b'\t' => "\\t".to_string(),
406 b'\r' => "\\r".to_string(),
407 b if b.is_ascii_graphic() || b == b' ' => (b as char).to_string(),
408 b => format!("\\x{:02X}", b),
409 })
410 .collect();
411 format!("L({ws:?}, {t:?}, {o:?}, \"{}\")", safe_s)
412 })
413 .collect::<Vec<_>>()
414 .join("\n")
415 }
416
417 #[test]
419 fn test_all_text() {
420 assert_snapshot!(lexemes("foo bar"), @r###"L(false, Text, 0, "foo bar")"###);
421 }
422
423 #[test]
425 fn test_escapes() {
426 assert_snapshot!(lexemes(r#"foo {{bar}}"#), @r###"
427 L(false, Text, 0, "foo ")
428 L(false, LLBrace, 5, "{")
429 L(false, Text, 6, "bar")
430 L(false, RRBrace, 10, "}")
431 "###);
432 }
433
434 #[test]
436 fn test_expressions() {
437 assert_snapshot!(lexemes(r#"foo {bar}"#), @r###"
438 L(false, Text, 0, "foo ")
439 L(false, LBrace, 4, "{")
440 L(false, Ident, 5, "bar")
441 L(false, RBrace, 8, "}")
442 "###);
443 }
444
445 #[test]
447 fn test_expression_whitespace() {
448 assert_snapshot!(lexemes(r#"foo { bar }"#), @r###"
449 L(false, Text, 0, "foo ")
450 L(false, LBrace, 4, "{")
451 L(true, Ident, 7, "bar")
452 L(true, RBrace, 13, "}")
453 "###);
454 }
455
456 #[test]
458 fn test_expression_dots() {
459 assert_snapshot!(lexemes(r#"foo {bar. baz . qux}"#), @r###"
460 L(false, Text, 0, "foo ")
461 L(false, LBrace, 4, "{")
462 L(false, Ident, 5, "bar")
463 L(false, Dot, 8, ".")
464 L(true, Ident, 10, "baz")
465 L(true, Dot, 15, ".")
466 L(true, Ident, 17, "qux")
467 L(false, RBrace, 20, "}")
468 "###);
469 }
470
471 #[test]
473 fn test_multiple_expressions() {
474 assert_snapshot!(lexemes(r#"foo {bar.baz} qux {quy.quz}"#), @r###"
475 L(false, Text, 0, "foo ")
476 L(false, LBrace, 4, "{")
477 L(false, Ident, 5, "bar")
478 L(false, Dot, 8, ".")
479 L(false, Ident, 9, "baz")
480 L(false, RBrace, 12, "}")
481 L(false, Text, 13, " qux ")
482 L(false, LBrace, 18, "{")
483 L(false, Ident, 19, "quy")
484 L(false, Dot, 22, ".")
485 L(false, Ident, 23, "quz")
486 L(false, RBrace, 26, "}")
487 "###);
488 }
489
490 #[test]
493 fn test_nested_curlies() {
494 assert_snapshot!(lexemes(r#"foo {bar {baz} qux}"#), @r###"
495 L(false, Text, 0, "foo ")
496 L(false, LBrace, 4, "{")
497 L(false, Ident, 5, "bar")
498 L(true, LBrace, 9, "{")
499 L(false, Ident, 10, "baz")
500 L(false, RBrace, 13, "}")
501 L(true, Ident, 15, "qux")
502 L(false, RBrace, 18, "}")
503 "###);
504 }
505
506 #[test]
508 fn test_unbalanced_curlies() {
509 assert_snapshot!(lexemes(r#"foo}{bar{}}"#), @r###"
510 L(false, Text, 0, "foo")
511 L(false, RBrace, 3, "}")
512 L(false, LBrace, 4, "{")
513 L(false, Ident, 5, "bar")
514 L(false, LBrace, 8, "{")
515 L(false, RBrace, 9, "}")
516 L(false, RBrace, 10, "}")
517 "###);
518 }
519
520 #[test]
522 fn test_unexpected_characters() {
523 assert_snapshot!(lexemes(r#"anything goes {? % ! 🔥}"#), @r###"
524 L(false, Text, 0, "anything goes ")
525 L(false, LBrace, 14, "{")
526 L(false, Unexpected, 15, "?")
527 L(true, Unexpected, 17, "%")
528 L(true, Unexpected, 19, "!")
529 L(true, Unexpected, 21, "\xF0\x9F\x94\xA5")
530 L(false, RBrace, 25, "}")
531 "###);
532 }
533
534 #[test]
538 fn test_triple_curlies() {
539 assert_snapshot!(lexemes(r#"foo {{{bar} {baz}}} }}} { {{ } qux"#), @r###"
540 L(false, Text, 0, "foo ")
541 L(false, LLBrace, 5, "{")
542 L(false, LBrace, 6, "{")
543 L(false, Ident, 7, "bar")
544 L(false, RBrace, 10, "}")
545 L(false, Text, 11, " ")
546 L(false, LBrace, 12, "{")
547 L(false, Ident, 13, "baz")
548 L(false, RBrace, 16, "}")
549 L(false, RRBrace, 18, "}")
550 L(false, Text, 19, " ")
551 L(false, RRBrace, 21, "}")
552 L(false, RBrace, 22, "}")
553 L(false, Text, 23, " ")
554 L(false, LBrace, 24, "{")
555 L(true, LBrace, 26, "{")
556 L(false, LBrace, 27, "{")
557 L(true, RBrace, 29, "}")
558 L(true, Ident, 31, "qux")
559 "###);
560 }
561
562 #[test]
565 fn test_alternates() {
566 assert_snapshot!(lexemes(r#"foo | {bar | baz.qux} | quy"#), @r###"
567 L(false, Text, 0, "foo | ")
568 L(false, LBrace, 6, "{")
569 L(false, Ident, 7, "bar")
570 L(true, Pipe, 11, "|")
571 L(true, Ident, 13, "baz")
572 L(false, Dot, 16, ".")
573 L(false, Ident, 17, "qux")
574 L(false, RBrace, 20, "}")
575 L(false, Text, 21, " | quy")
576 "###);
577 }
578
579 #[test]
582 fn test_indices() {
583 assert_snapshot!(lexemes(r#"foo {bar[baz].qux=>[quy]->[quz]}"#), @r###"
584 L(false, Text, 0, "foo ")
585 L(false, LBrace, 4, "{")
586 L(false, Ident, 5, "bar")
587 L(false, LBracket, 8, "[")
588 L(false, Ident, 9, "baz")
589 L(false, RBracket, 12, "]")
590 L(false, Dot, 13, ".")
591 L(false, Ident, 14, "qux")
592 L(false, AArrow, 17, "=>")
593 L(false, LBracket, 19, "[")
594 L(false, Ident, 20, "quy")
595 L(false, RBracket, 23, "]")
596 L(false, Arrow, 24, "->")
597 L(false, LBracket, 26, "[")
598 L(false, Ident, 27, "quz")
599 L(false, RBracket, 30, "]")
600 L(false, RBrace, 31, "}")
601 "###);
602 }
603
604 #[test]
606 fn test_numeric_literals() {
607 assert_snapshot!(lexemes(r#"{123 0x123 def 0xdef}"#), @r###"
608 L(false, LBrace, 0, "{")
609 L(false, NumDec, 1, "123")
610 L(true, NumHex, 7, "123")
611 L(true, Ident, 11, "def")
612 L(true, NumHex, 17, "def")
613 L(false, RBrace, 20, "}")
614 "###);
615 }
616
617 #[test]
621 fn test_numeric_literal_underscores() {
622 assert_snapshot!(lexemes(r#"{123_456 0x12_ab_de _123}"#), @r###"
623 L(false, LBrace, 0, "{")
624 L(false, NumDec, 1, "123_456")
625 L(true, NumHex, 11, "12_ab_de")
626 L(true, Ident, 20, "_123")
627 L(false, RBrace, 24, "}")
628 "###);
629 }
630
631 #[test]
634 fn test_address_literals() {
635 assert_snapshot!(lexemes(r#"{@123 @0x123}"#), @r###"
636 L(false, LBrace, 0, "{")
637 L(false, At, 1, "@")
638 L(false, NumDec, 2, "123")
639 L(true, At, 6, "@")
640 L(false, NumHex, 9, "123")
641 L(false, RBrace, 12, "}")
642 "###);
643 }
644
645 #[test]
647 fn test_incomplete_hexadecimal() {
648 assert_snapshot!(lexemes(r#"{0x}"#), @r###"
649 L(false, LBrace, 0, "{")
650 L(false, NumDec, 1, "0")
651 L(false, Ident, 2, "x")
652 L(false, RBrace, 3, "}")
653 "###);
654 }
655
656 #[test]
659 fn test_vector_literals() {
660 assert_snapshot!(lexemes(r#"{vector[1, 2, 3] vector<u32> vector[4u64]}"#), @r###"
661 L(false, LBrace, 0, "{")
662 L(false, Ident, 1, "vector")
663 L(false, LBracket, 7, "[")
664 L(false, NumDec, 8, "1")
665 L(false, Comma, 9, ",")
666 L(true, NumDec, 11, "2")
667 L(false, Comma, 12, ",")
668 L(true, NumDec, 14, "3")
669 L(false, RBracket, 15, "]")
670 L(true, Ident, 17, "vector")
671 L(false, LAngle, 23, "<")
672 L(false, Ident, 24, "u32")
673 L(false, RAngle, 27, ">")
674 L(true, Ident, 29, "vector")
675 L(false, LBracket, 35, "[")
676 L(false, NumDec, 36, "4")
677 L(false, Ident, 37, "u64")
678 L(false, RBracket, 40, "]")
679 L(false, RBrace, 41, "}")
680 "###);
681 }
682
683 #[test]
685 fn test_types() {
686 assert_snapshot!(lexemes(r#"{0x2::table::Table<address, 0x2::coin::Coin<0x2::sui::SUI>>}"#), @r###"
687 L(false, LBrace, 0, "{")
688 L(false, NumHex, 3, "2")
689 L(false, CColon, 4, "::")
690 L(false, Ident, 6, "table")
691 L(false, CColon, 11, "::")
692 L(false, Ident, 13, "Table")
693 L(false, LAngle, 18, "<")
694 L(false, Ident, 19, "address")
695 L(false, Comma, 26, ",")
696 L(true, NumHex, 30, "2")
697 L(false, CColon, 31, "::")
698 L(false, Ident, 33, "coin")
699 L(false, CColon, 37, "::")
700 L(false, Ident, 39, "Coin")
701 L(false, LAngle, 43, "<")
702 L(false, NumHex, 46, "2")
703 L(false, CColon, 47, "::")
704 L(false, Ident, 49, "sui")
705 L(false, CColon, 52, "::")
706 L(false, Ident, 54, "SUI")
707 L(false, RAngle, 57, ">")
708 L(false, RAngle, 58, ">")
709 L(false, RBrace, 59, "}")
710 "###);
711 }
712
713 #[test]
716 fn test_positional_struct_literals() {
717 assert_snapshot!(lexemes(r#"{0x2::balance::Balance<0x2::sui::SUI>(42u64)}"#), @r###"
718 L(false, LBrace, 0, "{")
719 L(false, NumHex, 3, "2")
720 L(false, CColon, 4, "::")
721 L(false, Ident, 6, "balance")
722 L(false, CColon, 13, "::")
723 L(false, Ident, 15, "Balance")
724 L(false, LAngle, 22, "<")
725 L(false, NumHex, 25, "2")
726 L(false, CColon, 26, "::")
727 L(false, Ident, 28, "sui")
728 L(false, CColon, 31, "::")
729 L(false, Ident, 33, "SUI")
730 L(false, RAngle, 36, ">")
731 L(false, LParen, 37, "(")
732 L(false, NumDec, 38, "42")
733 L(false, Ident, 40, "u64")
734 L(false, RParen, 43, ")")
735 L(false, RBrace, 44, "}")
736 "###);
737 }
738
739 #[test]
742 fn test_struct_literals() {
743 assert_snapshot!(lexemes(r#"{0x2::coin::Coin<0x2::sui::SUI> { id: @0x123, value: 42u64 }}"#), @r###"
744 L(false, LBrace, 0, "{")
745 L(false, NumHex, 3, "2")
746 L(false, CColon, 4, "::")
747 L(false, Ident, 6, "coin")
748 L(false, CColon, 10, "::")
749 L(false, Ident, 12, "Coin")
750 L(false, LAngle, 16, "<")
751 L(false, NumHex, 19, "2")
752 L(false, CColon, 20, "::")
753 L(false, Ident, 22, "sui")
754 L(false, CColon, 25, "::")
755 L(false, Ident, 27, "SUI")
756 L(false, RAngle, 30, ">")
757 L(true, LBrace, 32, "{")
758 L(true, Ident, 34, "id")
759 L(false, Colon, 36, ":")
760 L(true, At, 38, "@")
761 L(false, NumHex, 41, "123")
762 L(false, Comma, 44, ",")
763 L(true, Ident, 46, "value")
764 L(false, Colon, 51, ":")
765 L(true, NumDec, 53, "42")
766 L(false, Ident, 55, "u64")
767 L(true, RBrace, 59, "}")
768 L(false, RBrace, 60, "}")
769 "###);
770 }
771
772 #[test]
776 fn test_enum_literals() {
777 assert_snapshot!(lexemes(r#"{0x2::option::Option<u64>::1(42) 0x2::option::Option<u64>::Some#1(43)}"#), @r###"
778 L(false, LBrace, 0, "{")
779 L(false, NumHex, 3, "2")
780 L(false, CColon, 4, "::")
781 L(false, Ident, 6, "option")
782 L(false, CColon, 12, "::")
783 L(false, Ident, 14, "Option")
784 L(false, LAngle, 20, "<")
785 L(false, Ident, 21, "u64")
786 L(false, RAngle, 24, ">")
787 L(false, CColon, 25, "::")
788 L(false, NumDec, 27, "1")
789 L(false, LParen, 28, "(")
790 L(false, NumDec, 29, "42")
791 L(false, RParen, 31, ")")
792 L(true, NumHex, 35, "2")
793 L(false, CColon, 36, "::")
794 L(false, Ident, 38, "option")
795 L(false, CColon, 44, "::")
796 L(false, Ident, 46, "Option")
797 L(false, LAngle, 52, "<")
798 L(false, Ident, 53, "u64")
799 L(false, RAngle, 56, ">")
800 L(false, CColon, 57, "::")
801 L(false, Ident, 59, "Some")
802 L(false, Pound, 63, "#")
803 L(false, NumDec, 64, "1")
804 L(false, LParen, 65, "(")
805 L(false, NumDec, 66, "43")
806 L(false, RParen, 68, ")")
807 L(false, RBrace, 69, "}")
808 "###);
809 }
810
811 #[test]
813 fn string_literals() {
814 assert_snapshot!(lexemes(r#"{x'0f00' b'bar' 'baz'}"#), @r###"
815 L(false, LBrace, 0, "{")
816 L(false, Ident, 1, "x")
817 L(false, String, 3, "0f00")
818 L(true, Ident, 9, "b")
819 L(false, String, 11, "bar")
820 L(true, String, 17, "baz")
821 L(false, RBrace, 21, "}")
822 "###);
823 }
824
825 #[test]
828 fn test_string_literal_escapes() {
829 assert_snapshot!(lexemes(r#"{'\' \x \\'}"#), @r###"
830 L(false, LBrace, 0, "{")
831 L(false, String, 2, "\\' \\x \\\\")
832 L(false, RBrace, 11, "}")
833 "###);
834 }
835
836 #[test]
839 fn test_string_literal_trailing() {
840 assert_snapshot!(lexemes(r#"{'foo bar}"#), @r###"
841 L(false, LBrace, 0, "{")
842 L(false, Unexpected, 1, "'foo bar}")
843 "###);
844 }
845
846 #[test]
848 fn test_unexpected_single_byte() {
849 assert_snapshot!(lexemes("{$hello}"), @r###"
850 L(false, LBrace, 0, "{")
851 L(false, Unexpected, 1, "$")
852 L(false, Ident, 2, "hello")
853 L(false, RBrace, 7, "}")
854 "###);
855 }
856
857 #[test]
859 fn test_unexpected_before_multibyte() {
860 assert_snapshot!(lexemes("{$é}"), @r###"
861 L(false, LBrace, 0, "{")
862 L(false, Unexpected, 1, "$")
863 L(false, Unexpected, 2, "\xC3\xA9")
864 L(false, RBrace, 4, "}")
865 "###);
866 }
867
868 #[test]
870 fn test_unexpected_characters_utf8_safe() {
871 assert_snapshot!(lexemes("{$∑∞}"), @r###"
872 L(false, LBrace, 0, "{")
873 L(false, Unexpected, 1, "$")
874 L(false, Unexpected, 2, "\xE2\x88\x91")
875 L(false, Unexpected, 5, "\xE2\x88\x9E")
876 L(false, RBrace, 8, "}")
877 "###);
878 }
879
880 #[test]
883 fn test_ascii_whitespace_only() {
884 assert_snapshot!(lexemes("{ \t\n\u{00A0}hello}"), @r###"
885 L(false, LBrace, 0, "{")
886 L(true, Unexpected, 4, "\xC2\xA0")
887 L(false, Ident, 6, "hello")
888 L(false, RBrace, 11, "}")
889 "###);
890 }
891
892 #[test]
895 fn test_incomplete_utf8_boundary_fallback() {
896 let mut input = vec![b'{'];
898 input.push(0xC3); let input_str = unsafe { std::str::from_utf8_unchecked(&input) };
900
901 assert_snapshot!(lexemes(input_str), @r###"
903 L(false, LBrace, 0, "{")
904 L(false, Unexpected, 1, "\xC3")
905 "###);
906 }
907}