orbit/parser/
tokenizer.rs1use std::iter::Peekable;
4use std::str::Chars;
5
6#[derive(Debug, Clone, PartialEq)]
8#[allow(dead_code)]
9pub enum Token {
10 OpenTag(String),
12 CloseTag(String),
13 SelfClosingTag(String),
14 AttrName(String),
15 AttrValue(String),
16 Text(String),
17
18 ExprStart, ExprEnd, Equal, Quote, OpenBrace, CloseBrace, OpenParen, CloseParen, Dot, Comma, Plus, Minus, Star, Slash, Identifier(String),
43 Number(String),
44 String(String),
45
46 Eof,
48 Error(String),
49}
50
51pub struct Tokenizer<'a> {
53 input: Peekable<Chars<'a>>,
54 line: usize,
55 column: usize,
56}
57
58impl<'a> Tokenizer<'a> {
59 pub fn new(input: &'a str) -> Self {
61 Self {
62 input: input.chars().peekable(),
63 line: 1,
64 column: 0,
65 }
66 }
67
68 pub fn next_token(&mut self) -> Token {
70 self.skip_whitespace();
71
72 match self.peek() {
73 None => Token::Eof,
74 Some(ch) => match ch {
75 '<' => self.read_tag(),
76 '{' => {
77 if self.peek_next() == Some('{') {
78 self.advance(); self.advance(); Token::ExprStart
81 } else {
82 self.advance();
83 Token::OpenBrace
84 }
85 }
86 '}' => {
87 if self.peek_next() == Some('}') {
88 self.advance(); self.advance(); Token::ExprEnd
91 } else {
92 self.advance();
93 Token::CloseBrace
94 }
95 }
96 '@' => {
97 self.advance(); let mut name = String::new();
100 name.push('@'); while let Some(ch) = self.peek() {
103 if ch.is_alphanumeric() || ch == '-' {
104 name.push(ch);
105 self.advance();
106 } else {
107 break;
108 }
109 }
110 Token::AttrName(name)
111 }
112 '=' => {
113 self.advance();
114 Token::Equal
115 }
116 '"' | '\'' => self.read_string(),
117 '.' => {
118 self.advance();
119 Token::Dot
120 }
121 ',' => {
122 self.advance();
123 Token::Comma
124 }
125 '+' => {
126 self.advance();
127 Token::Plus
128 }
129 '-' => {
130 self.advance();
131 Token::Minus
132 }
133 '*' => {
134 self.advance();
135 Token::Star
136 }
137 '/' => {
138 self.advance();
139 Token::Slash
140 }
141 '(' => {
142 self.advance();
143 Token::OpenParen
144 }
145 ')' => {
146 self.advance();
147 Token::CloseParen
148 }
149 ch if ch.is_ascii_digit() => self.read_number(),
150 ch if ch.is_alphabetic() || ch == '_' => {
151 let saved_pos = self.input.clone();
153 let mut ident = String::new();
154
155 while let Some(ch) = self.peek() {
156 if ch.is_alphanumeric() || ch == '_' || ch == '-' {
157 ident.push(ch);
158 self.advance();
159 } else {
160 break;
161 }
162 }
163
164 self.skip_whitespace();
166
167 if self.peek() == Some('=') {
169 Token::AttrName(ident)
170 } else {
171 self.input = saved_pos;
173 self.read_identifier()
174 }
175 }
176 _ch => self.read_text(),
177 },
178 }
179 }
180
181 fn read_tag(&mut self) -> Token {
183 self.advance(); let mut name = String::new();
185
186 if self.peek() == Some('/') {
187 self.advance(); while let Some(ch) = self.peek() {
189 if ch == '>' {
190 self.advance();
191 return Token::CloseTag(name);
192 }
193 name.push(ch);
194 self.advance();
195 }
196 }
197
198 while let Some(ch) = self.peek() {
200 match ch {
201 '>' => {
202 self.advance();
203 return Token::OpenTag(name);
204 }
205 '/' => {
206 self.advance();
207 if self.peek() == Some('>') {
208 self.advance();
209 return Token::SelfClosingTag(name);
210 }
211 }
212 ch if ch.is_whitespace() => {
213 return Token::OpenTag(name);
215 }
216 _ => {
217 name.push(ch);
218 self.advance();
219 }
220 }
221 }
222
223 Token::Error("Unclosed tag".to_string())
224 }
225
226 #[allow(dead_code)]
229 fn read_event_handler(&mut self) -> Token {
230 Token::Error("EventHandler is deprecated".to_string())
231 }
232
233 fn read_string(&mut self) -> Token {
235 let quote = self.advance().unwrap();
236 let mut value = String::new();
237
238 while let Some(ch) = self.peek() {
239 if ch == quote {
240 self.advance();
241 return Token::String(value);
242 }
243 value.push(ch);
244 self.advance();
245 }
246
247 Token::Error("Unclosed string literal".to_string())
248 }
249
250 fn read_number(&mut self) -> Token {
252 let mut number = String::new();
253
254 while let Some(ch) = self.peek() {
255 if ch.is_ascii_digit() || ch == '.' {
256 number.push(ch);
257 self.advance();
258 } else {
259 break;
260 }
261 }
262
263 Token::Number(number)
264 }
265
266 fn read_identifier(&mut self) -> Token {
268 let mut ident = String::new();
269
270 while let Some(ch) = self.peek() {
271 if ch.is_alphanumeric() || ch == '_' {
272 ident.push(ch);
273 self.advance();
274 } else {
275 break;
276 }
277 }
278
279 Token::Identifier(ident)
280 }
281
282 fn read_text(&mut self) -> Token {
284 let mut text = String::new();
285
286 if let Some(ch) = self.advance() {
288 if ch != '>' {
291 text.push(ch);
292 }
293 } else {
294 return Token::Eof;
295 }
296
297 while let Some(ch) = self.peek() {
299 if ch == '<' || ch == '{' || ch == '@' || ch == '=' {
300 break;
301 }
302 text.push(ch);
303 self.advance();
304 }
305
306 if text.trim().is_empty() {
308 if self.peek() == Some('<') {
310 return self.next_token();
311 }
312 }
313
314 Token::Text(text)
315 }
316
317 fn skip_whitespace(&mut self) {
319 while let Some(ch) = self.peek() {
320 if ch.is_whitespace() {
321 if ch == '\n' {
322 self.line += 1;
323 self.column = 0;
324 } else {
325 self.column += 1;
326 }
327 self.advance();
328 } else {
329 break;
330 }
331 }
332 }
333
334 fn peek(&mut self) -> Option<char> {
336 self.input.peek().copied()
337 }
338
339 fn peek_next(&mut self) -> Option<char> {
341 let mut iter = self.input.clone();
342 iter.next(); iter.next() }
345
346 fn advance(&mut self) -> Option<char> {
348 let ch = self.input.next();
349 if let Some(_ch) = ch {
350 self.column += 1;
351 }
352 ch
353 }
354}