sui_sql_macro/
lexer.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::fmt;
5
6/// Lexer for SQL format strings. Format string can contain regular text, or binders surrounded by
7/// curly braces. Curly braces are escaped by doubling them up.
8pub(crate) struct Lexer<'s> {
9    src: &'s str,
10    off: usize,
11}
12
13/// A lexeme is a token along with its offset in the source string, and the slice of source string
14/// that it originated from.
15#[derive(Clone, Copy, Debug, PartialEq, Eq)]
16pub(crate) struct Lexeme<'s>(pub Token, pub usize, pub &'s str);
17
18#[derive(Clone, Copy, Debug, PartialEq, Eq)]
19pub(crate) enum Token {
20    /// '{'
21    LCurl,
22    /// '}'
23    RCurl,
24    /// Any other text
25    Text,
26}
27
28impl<'s> Lexer<'s> {
29    pub(crate) fn new(src: &'s str) -> Self {
30        Self { src, off: 0 }
31    }
32}
33
34impl<'s> Iterator for Lexer<'s> {
35    type Item = Lexeme<'s>;
36
37    fn next(&mut self) -> Option<Self::Item> {
38        let off = self.off;
39        let bytes = self.src.as_bytes();
40        let fst = bytes.first()?;
41
42        Some(match fst {
43            b'{' => {
44                let span = &self.src[..1];
45                self.src = &self.src[1..];
46                self.off += 1;
47                Lexeme(Token::LCurl, off, span)
48            }
49
50            b'}' => {
51                let span = &self.src[..1];
52                self.src = &self.src[1..];
53                self.off += 1;
54                Lexeme(Token::RCurl, off, span)
55            }
56
57            _ => {
58                let end = self.src.find(['{', '}']).unwrap_or(self.src.len());
59                let span = &self.src[..end];
60                self.src = &self.src[end..];
61                self.off += end;
62                Lexeme(Token::Text, off, span)
63            }
64        })
65    }
66}
67
68impl fmt::Display for Token {
69    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
70        use Token as T;
71        match self {
72            T::LCurl => write!(f, "'{{'"),
73            T::RCurl => write!(f, "'}}'"),
74            T::Text => write!(f, "text"),
75        }
76    }
77}
78
79#[cfg(test)]
80mod tests {
81    use super::*;
82    use Lexeme as L;
83    use Token as T;
84
85    /// Lexing source material that only contains text and no curly braces.
86    #[test]
87    fn test_all_text() {
88        let lexer = Lexer::new("foo bar");
89        let lexemes: Vec<_> = lexer.collect();
90        assert_eq!(lexemes, vec![L(T::Text, 0, "foo bar")]);
91    }
92
93    /// When the lexer encounters curly braces in the source material it breaks up the text with
94    /// curly brace tokens.
95    #[test]
96    fn test_curlies() {
97        let lexer = Lexer::new("foo {bar} baz");
98        let lexemes: Vec<_> = lexer.collect();
99        assert_eq!(
100            lexemes,
101            vec![
102                L(T::Text, 0, "foo "),
103                L(T::LCurl, 4, "{"),
104                L(T::Text, 5, "bar"),
105                L(T::RCurl, 8, "}"),
106                L(T::Text, 9, " baz"),
107            ],
108        );
109    }
110
111    /// Repeated curly braces next to each other are used to escape those braces.
112    #[test]
113    fn test_escape_curlies() {
114        let lexer = Lexer::new("foo {{bar}} baz");
115        let lexemes: Vec<_> = lexer.collect();
116        assert_eq!(
117            lexemes,
118            vec![
119                L(T::Text, 0, "foo "),
120                L(T::LCurl, 4, "{"),
121                L(T::LCurl, 5, "{"),
122                L(T::Text, 6, "bar"),
123                L(T::RCurl, 9, "}"),
124                L(T::RCurl, 10, "}"),
125                L(T::Text, 11, " baz"),
126            ],
127        );
128    }
129
130    /// Each curly brace is given its own token so that the parser can parse this as an escaped
131    /// opening curly followed by an empty binder, followed by a literal closing curly. If the
132    /// lexer was responsible for detecting escaped curlies, it would eagerly detect the escaped
133    /// closing curly and then the closing curly for the binder.
134    #[test]
135    fn test_combination_curlies() {
136        let lexer = Lexer::new("{{{}}}");
137        let lexemes: Vec<_> = lexer.collect();
138        assert_eq!(
139            lexemes,
140            vec![
141                L(T::LCurl, 0, "{"),
142                L(T::LCurl, 1, "{"),
143                L(T::LCurl, 2, "{"),
144                L(T::RCurl, 3, "}"),
145                L(T::RCurl, 4, "}"),
146                L(T::RCurl, 5, "}"),
147            ],
148        );
149    }
150}