sui_display/v1/
parser.rs

1// Copyright (c) Mysten Labs, Inc.
2// SPDX-License-Identifier: Apache-2.0
3
4use std::borrow::Cow;
5use std::fmt;
6use std::iter::Peekable;
7
8use move_core_types::annotated_extractor::Element;
9use move_core_types::identifier;
10
11use crate::v1::lexer::Lexeme as L;
12use crate::v1::lexer::Lexer;
13use crate::v1::lexer::OwnedLexeme;
14use crate::v1::lexer::Token as T;
15use crate::v1::lexer::TokenSet;
16
17/// A strand is a single component of a format string, it can either be a piece of literal text
18/// that needs to be preserved in the output, or a reference to a nested field (as a sequence of
19/// field accesses) in the object being displayed which will need to be fetched and interpolated.
20#[derive(Debug, Eq, PartialEq)]
21pub enum Strand<'s> {
22    Text(Cow<'s, str>),
23    Expr(Vec<Element<'s>>),
24}
25
26pub(crate) struct Parser<'s> {
27    max_depth: usize,
28    lexer: Peekable<Lexer<'s>>,
29}
30
31#[derive(thiserror::Error, Debug)]
32pub enum Error {
33    #[error("Invalid identifier {ident:?} at offset {off}")]
34    InvalidIdentifier { ident: String, off: usize },
35
36    #[error("Field access at offset {off} deeper than the maximum of {max}")]
37    TooDeep { max: usize, off: usize },
38
39    #[error("Unexpected end-of-string, expected {expect}")]
40    UnexpectedEos { expect: TokenSet<'static> },
41
42    #[error("Unexpected {actual}, expected {expect}")]
43    UnexpectedToken {
44        actual: OwnedLexeme,
45        expect: TokenSet<'static>,
46    },
47}
48
49/// Pattern match on the next token in the lexer, without consuming it. Returns an error if there
50/// is no next token, or if the next token doesn't match any of the provided patterns. The error
51/// enumerates all the tokens that were expected.
52macro_rules! match_token {
53    ($lexer:expr; $(L($($pat:path)|+, $off:pat, $slice:pat) => $expr:expr),+ $(,)?) => {{
54        const EXPECTED: TokenSet = TokenSet(&[$($($pat),+),+]);
55
56        match $lexer.peek().ok_or_else(|| Error::UnexpectedEos { expect: EXPECTED })? {
57            $(&L($($pat)|+, $off, $slice) => $expr,)+
58            &actual => return Err(Error::UnexpectedToken {
59                actual: actual.detach(),
60                expect: EXPECTED,
61            }),
62        }
63    }};
64}
65
66/// Recursive descent parser for Display V1 format strings, parsing the following grammar:
67///
68///   format ::= strand*
69///   strand ::= text | expr
70///   text   ::= part+
71///   part   ::= TEXT | ESCAPED
72///   expr   ::= '{' IDENT ('.' IDENT)* '}'
73///
74/// The grammar has a lookahead of one token, and requires no backtracking.
75impl<'s> Parser<'s> {
76    /// Construct a new parser, consuming input from the `src` string. `max_depth` controls how
77    /// deeply nested a field access expression can be before it is considered an error.
78    pub(crate) fn new(max_depth: usize, src: &'s str) -> Self {
79        Self {
80            max_depth,
81            lexer: Lexer::new(src).peekable(),
82        }
83    }
84
85    /// Entrypoint into the parser, parsing the root non-terminal -- `format`. Consumes all the
86    /// remaining input in the parser and the parser itself.
87    pub(crate) fn parse_format(mut self) -> Result<Vec<Strand<'s>>, Error> {
88        let mut strands = vec![];
89        while self.lexer.peek().is_some() {
90            strands.push(self.parse_strand()?);
91        }
92
93        Ok(strands)
94    }
95
96    fn parse_strand(&mut self) -> Result<Strand<'s>, Error> {
97        Ok(match_token! { self.lexer;
98            L(T::Text | T::Escaped, _, _) => Strand::Text(self.parse_text()?),
99            L(T::LCurl, _, _) => Strand::Expr(self.parse_expr()?),
100        })
101    }
102
103    fn parse_text(&mut self) -> Result<Cow<'s, str>, Error> {
104        let mut text = self.parse_part()?;
105        while let Some(L(T::Text | T::Escaped, _, _)) = self.lexer.peek() {
106            text += self.parse_part()?;
107        }
108
109        Ok(text)
110    }
111
112    fn parse_part(&mut self) -> Result<Cow<'s, str>, Error> {
113        Ok(match_token! { self.lexer;
114            L(T::Text | T::Escaped, _, slice) => {
115                self.lexer.next();
116                Cow::Borrowed(slice)
117            }
118        })
119    }
120
121    fn parse_expr(&mut self) -> Result<Vec<Element<'s>>, Error> {
122        match_token! { self.lexer; L(T::LCurl, _, _) => self.lexer.next() };
123        let mut idents = vec![self.parse_ident()?];
124
125        loop {
126            match_token! { self.lexer;
127                L(T::RCurl, _, _) => {
128                    self.lexer.next();
129                    break;
130                },
131                L(T::Dot, off, _) => {
132                    self.lexer.next();
133
134                    if idents.len() >= self.max_depth {
135                        return Err(Error::TooDeep {
136                            max: self.max_depth,
137                            off,
138                        });
139                    }
140
141                    idents.push(self.parse_ident()?);
142                }
143            };
144        }
145
146        Ok(idents)
147    }
148
149    fn parse_ident(&mut self) -> Result<Element<'s>, Error> {
150        Ok(match_token! { self.lexer;
151            L(T::Ident, off, ident) => {
152                self.lexer.next();
153                if identifier::is_valid(ident) {
154                    Element::Field(ident)
155                } else {
156                    return Err(Error::InvalidIdentifier { ident: ident.to_string(), off });
157                }
158            }
159        })
160    }
161}
162
163impl fmt::Display for Strand<'_> {
164    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
165        match self {
166            Strand::Text(text) => write!(f, "{text:?}"),
167            Strand::Expr(path) => {
168                let mut prefix = "";
169                for field in path {
170                    let Element::Field(name) = field else {
171                        unreachable!("unexpected non-field element in path");
172                    };
173
174                    write!(f, "{prefix}{name}")?;
175                    prefix = ".";
176                }
177                Ok(())
178            }
179        }
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    fn field(f: &str) -> Element<'_> {
188        Element::Field(f)
189    }
190
191    #[test]
192    fn test_literal_string() {
193        assert_eq!(
194            Parser::new(10, "foo bar").parse_format().unwrap(),
195            vec![Strand::Text("foo bar".into())]
196        );
197    }
198
199    #[test]
200    fn test_field_expr() {
201        assert_eq!(
202            Parser::new(10, "{foo}").parse_format().unwrap(),
203            vec![Strand::Expr(vec![field("foo")])]
204        );
205    }
206
207    #[test]
208    fn test_compound_expr() {
209        assert_eq!(
210            Parser::new(10, "{foo.bar.baz}").parse_format().unwrap(),
211            vec![Strand::Expr(
212                vec![field("foo"), field("bar"), field("baz"),]
213            )]
214        );
215    }
216
217    #[test]
218    fn test_text_with_escape() {
219        assert_eq!(
220            Parser::new(10, r#"foo \{bar\} baz"#)
221                .parse_format()
222                .unwrap(),
223            vec![Strand::Text(r#"foo {bar} baz"#.into())],
224        );
225    }
226
227    #[test]
228    fn test_escape_chain() {
229        assert_eq!(
230            Parser::new(10, r#"\\\\\\\\\"#).parse_format().unwrap(),
231            vec![Strand::Text(r#"\\\\\"#.into())],
232        );
233    }
234
235    #[test]
236    fn test_back_to_back_exprs() {
237        assert_eq!(
238            Parser::new(10, "{foo . bar}{baz.qux}")
239                .parse_format()
240                .unwrap(),
241            vec![
242                Strand::Expr(vec![field("foo"), field("bar")]),
243                Strand::Expr(vec![field("baz"), field("qux")])
244            ]
245        );
246    }
247
248    #[test]
249    fn test_bad_identifier() {
250        assert_eq!(
251            Parser::new(10, "{foo.bar.baz!}")
252                .parse_format()
253                .unwrap_err()
254                .to_string(),
255            "Invalid identifier \"baz!\" at offset 9",
256        );
257    }
258
259    #[test]
260    fn test_unexpected_lcurly() {
261        assert_eq!(
262            Parser::new(10, "{foo{}}")
263                .parse_format()
264                .unwrap_err()
265                .to_string(),
266            "Unexpected '{' at offset 4, expected one of '}', or '.'",
267        );
268    }
269
270    #[test]
271    fn test_unexpected_rcurly() {
272        assert_eq!(
273            Parser::new(10, "foo bar}")
274                .parse_format()
275                .unwrap_err()
276                .to_string(),
277            "Unexpected '}' at offset 7, expected one of text, an escaped character, or '{'",
278        );
279    }
280
281    #[test]
282    fn test_no_dot() {
283        assert_eq!(
284            Parser::new(10, "{foo bar}")
285                .parse_format()
286                .unwrap_err()
287                .to_string(),
288            "Unexpected identifier \"bar\" at offset 5, expected one of '}', or '.'",
289        );
290    }
291
292    #[test]
293    fn test_empty_expr() {
294        assert_eq!(
295            Parser::new(10, "foo {} bar")
296                .parse_format()
297                .unwrap_err()
298                .to_string(),
299            "Unexpected '}' at offset 5, expected an identifier",
300        );
301    }
302
303    #[test]
304    fn test_unexpected_eos() {
305        assert_eq!(
306            Parser::new(10, "foo {bar")
307                .parse_format()
308                .unwrap_err()
309                .to_string(),
310            "Unexpected end-of-string, expected one of '}', or '.'",
311        );
312    }
313
314    #[test]
315    fn test_too_deep() {
316        assert_eq!(
317            Parser::new(2, "{foo.bar.baz}")
318                .parse_format()
319                .unwrap_err()
320                .to_string(),
321            "Field access at offset 8 deeper than the maximum of 2",
322        );
323    }
324}