orbit/parser/
tokenizer.rs

1//! Tokenizer for .orbit files
2
3use std::iter::Peekable;
4use std::str::Chars;
5
6/// Token types that can appear in a template
7#[derive(Debug, Clone, PartialEq)]
8#[allow(dead_code)]
9pub enum Token {
10    // Template tokens
11    OpenTag(String),
12    CloseTag(String),
13    SelfClosingTag(String),
14    AttrName(String),
15    AttrValue(String),
16    Text(String),
17
18    // Expression tokens
19    ExprStart, // {{
20    ExprEnd,   // }}
21    // EventHandler is now handled through AttrName with @ prefix
22
23    // Punctuation
24    Equal, // =
25    Quote, // " or '
26
27    // Delimiters
28    OpenBrace,  // {
29    CloseBrace, // }
30    OpenParen,  // (
31    CloseParen, // )
32
33    // Expression operators
34    Dot,   // .
35    Comma, // ,
36    Plus,  // +
37    Minus, // -
38    Star,  // *
39    Slash, // /
40
41    // Keywords
42    Identifier(String),
43    Number(String),
44    String(String),
45
46    // Special
47    Eof,
48    Error(String),
49}
50
51/// Tokenizes an input string into a sequence of tokens
52pub struct Tokenizer<'a> {
53    input: Peekable<Chars<'a>>,
54    line: usize,
55    column: usize,
56}
57
58impl<'a> Tokenizer<'a> {
59    /// Create a new tokenizer for the given input
60    pub fn new(input: &'a str) -> Self {
61        Self {
62            input: input.chars().peekable(),
63            line: 1,
64            column: 0,
65        }
66    }
67
68    /// Get the next token from the input
69    pub fn next_token(&mut self) -> Token {
70        self.skip_whitespace();
71
72        match self.peek() {
73            None => Token::Eof,
74            Some(ch) => match ch {
75                '<' => self.read_tag(),
76                '{' => {
77                    if self.peek_next() == Some('{') {
78                        self.advance(); // Skip first {
79                        self.advance(); // Skip second {
80                        Token::ExprStart
81                    } else {
82                        self.advance();
83                        Token::OpenBrace
84                    }
85                }
86                '}' => {
87                    if self.peek_next() == Some('}') {
88                        self.advance(); // Skip first }
89                        self.advance(); // Skip second }
90                        Token::ExprEnd
91                    } else {
92                        self.advance();
93                        Token::CloseBrace
94                    }
95                }
96                '@' => {
97                    self.advance(); // Skip @
98                                    // Read the event name
99                    let mut name = String::new();
100                    name.push('@'); // Keep the @ prefix in the attribute name
101
102                    while let Some(ch) = self.peek() {
103                        if ch.is_alphanumeric() || ch == '-' {
104                            name.push(ch);
105                            self.advance();
106                        } else {
107                            break;
108                        }
109                    }
110                    Token::AttrName(name)
111                }
112                '=' => {
113                    self.advance();
114                    Token::Equal
115                }
116                '"' | '\'' => self.read_string(),
117                '.' => {
118                    self.advance();
119                    Token::Dot
120                }
121                ',' => {
122                    self.advance();
123                    Token::Comma
124                }
125                '+' => {
126                    self.advance();
127                    Token::Plus
128                }
129                '-' => {
130                    self.advance();
131                    Token::Minus
132                }
133                '*' => {
134                    self.advance();
135                    Token::Star
136                }
137                '/' => {
138                    self.advance();
139                    Token::Slash
140                }
141                '(' => {
142                    self.advance();
143                    Token::OpenParen
144                }
145                ')' => {
146                    self.advance();
147                    Token::CloseParen
148                }
149                ch if ch.is_ascii_digit() => self.read_number(),
150                ch if ch.is_alphabetic() || ch == '_' => {
151                    // Check if we're parsing an attribute name
152                    let saved_pos = self.input.clone();
153                    let mut ident = String::new();
154
155                    while let Some(ch) = self.peek() {
156                        if ch.is_alphanumeric() || ch == '_' || ch == '-' {
157                            ident.push(ch);
158                            self.advance();
159                        } else {
160                            break;
161                        }
162                    }
163
164                    // Skip whitespace
165                    self.skip_whitespace();
166
167                    // If followed by '=', it's an attribute name
168                    if self.peek() == Some('=') {
169                        Token::AttrName(ident)
170                    } else {
171                        // Otherwise, reset position and read as normal identifier
172                        self.input = saved_pos;
173                        self.read_identifier()
174                    }
175                }
176                _ch => self.read_text(),
177            },
178        }
179    }
180
181    /// Read a complete tag (opening, closing, or self-closing)
182    fn read_tag(&mut self) -> Token {
183        self.advance(); // Skip <
184        let mut name = String::new();
185
186        if self.peek() == Some('/') {
187            self.advance(); // Skip /
188            while let Some(ch) = self.peek() {
189                if ch == '>' {
190                    self.advance();
191                    return Token::CloseTag(name);
192                }
193                name.push(ch);
194                self.advance();
195            }
196        }
197
198        // Read the tag name only (stop at whitespace or >)
199        while let Some(ch) = self.peek() {
200            match ch {
201                '>' => {
202                    self.advance();
203                    return Token::OpenTag(name);
204                }
205                '/' => {
206                    self.advance();
207                    if self.peek() == Some('>') {
208                        self.advance();
209                        return Token::SelfClosingTag(name);
210                    }
211                }
212                ch if ch.is_whitespace() => {
213                    // Stop at whitespace, the attribute parsing will continue from here
214                    return Token::OpenTag(name);
215                }
216                _ => {
217                    name.push(ch);
218                    self.advance();
219                }
220            }
221        }
222
223        Token::Error("Unclosed tag".to_string())
224    }
225
226    // Event handlers are now handled directly in the next_token method
227    // This method is kept as a placeholder to avoid having to update all references
228    #[allow(dead_code)]
229    fn read_event_handler(&mut self) -> Token {
230        Token::Error("EventHandler is deprecated".to_string())
231    }
232
233    /// Read a string literal
234    fn read_string(&mut self) -> Token {
235        let quote = self.advance().unwrap();
236        let mut value = String::new();
237
238        while let Some(ch) = self.peek() {
239            if ch == quote {
240                self.advance();
241                return Token::String(value);
242            }
243            value.push(ch);
244            self.advance();
245        }
246
247        Token::Error("Unclosed string literal".to_string())
248    }
249
250    /// Read a number literal
251    fn read_number(&mut self) -> Token {
252        let mut number = String::new();
253
254        while let Some(ch) = self.peek() {
255            if ch.is_ascii_digit() || ch == '.' {
256                number.push(ch);
257                self.advance();
258            } else {
259                break;
260            }
261        }
262
263        Token::Number(number)
264    }
265
266    /// Read an identifier
267    fn read_identifier(&mut self) -> Token {
268        let mut ident = String::new();
269
270        while let Some(ch) = self.peek() {
271            if ch.is_alphanumeric() || ch == '_' {
272                ident.push(ch);
273                self.advance();
274            } else {
275                break;
276            }
277        }
278
279        Token::Identifier(ident)
280    }
281
282    /// Read a text node
283    fn read_text(&mut self) -> Token {
284        let mut text = String::new();
285
286        // First character
287        if let Some(ch) = self.advance() {
288            // Skip '>' character if it's the start of a text node
289            // This is needed because we might have just consumed a tag
290            if ch != '>' {
291                text.push(ch);
292            }
293        } else {
294            return Token::Eof;
295        }
296
297        // Rest of the text until we hit a special character
298        while let Some(ch) = self.peek() {
299            if ch == '<' || ch == '{' || ch == '@' || ch == '=' {
300                break;
301            }
302            text.push(ch);
303            self.advance();
304        }
305
306        // If it's just whitespace, handle it specially
307        if text.trim().is_empty() {
308            // Skip whitespace tokens entirely between elements
309            if self.peek() == Some('<') {
310                return self.next_token();
311            }
312        }
313
314        Token::Text(text)
315    }
316
317    /// Skip whitespace characters
318    fn skip_whitespace(&mut self) {
319        while let Some(ch) = self.peek() {
320            if ch.is_whitespace() {
321                if ch == '\n' {
322                    self.line += 1;
323                    self.column = 0;
324                } else {
325                    self.column += 1;
326                }
327                self.advance();
328            } else {
329                break;
330            }
331        }
332    }
333
334    /// Peek at the next character without consuming it
335    fn peek(&mut self) -> Option<char> {
336        self.input.peek().copied()
337    }
338
339    /// Peek at the character after the next one
340    fn peek_next(&mut self) -> Option<char> {
341        let mut iter = self.input.clone();
342        iter.next(); // Skip current
343        iter.next() // Get next
344    }
345
346    /// Advance to the next character
347    fn advance(&mut self) -> Option<char> {
348        let ch = self.input.next();
349        if let Some(_ch) = ch {
350            self.column += 1;
351        }
352        ch
353    }
354}