toml_query/
tokenizer.rs

1/// The tokenizer for the query interpreter
2use crate::error::{Error, Result};
3
4#[derive(Debug, PartialEq, Eq)]
5pub enum Token {
6    Identifier {
7        ident: String,
8        next: Option<Box<Token>>,
9    },
10
11    Index {
12        idx: usize,
13        next: Option<Box<Token>>,
14    },
15}
16
17impl Token {
18    pub fn next(&self) -> Option<&Token> {
19        trace!("Matching token (self): {:?}", self);
20        match self {
21            Token::Identifier { ref next, .. } => next.as_ref().map(|t| &**t),
22            Token::Index { ref next, .. } => next.as_ref().map(|t| &**t),
23        }
24    }
25
26    /// Convenience function for `token.next().is_some()`
27    pub fn has_next(&self) -> bool {
28        trace!("self.has_next(): {:?}", self.next().is_some());
29        self.next().is_some()
30    }
31
32    pub fn set_next(&mut self, token: Token) {
33        trace!("self.set_next({:?})", token);
34        match self {
35            Token::Identifier { ref mut next, .. } => *next = Some(Box::new(token)),
36            Token::Index { ref mut next, .. } => *next = Some(Box::new(token)),
37        }
38    }
39
40    /// Pop the last token from the chain of tokens
41    ///
42    /// Returns None if the current Token has no next token
43    pub fn pop_last(&mut self) -> Option<Box<Token>> {
44        trace!("self.pop_last()");
45        if !self.has_next() {
46            trace!("self.pop_last(): No next");
47            None
48        } else {
49            trace!("self.pop_last(): Having next");
50            match self {
51                Token::Identifier { ref mut next, .. } => {
52                    trace!("self.pop_last(): self is Identifier");
53                    if next.is_some() {
54                        trace!("self.pop_last(): next is Some(_)");
55                        let mut n = next.take().unwrap();
56                        if n.has_next() {
57                            trace!("self.pop_last(): next also has a next");
58
59                            trace!("self.pop_last(): Recursing now");
60                            let result = n.pop_last();
61
62                            *next = Some(n);
63
64                            trace!("self.pop_last(): Returning Result");
65                            result
66                        } else {
67                            trace!("self.pop_last(): next itself has no next, returning Some");
68                            Some(n)
69                        }
70                    } else {
71                        trace!("self.pop_last(): next is none, returning None");
72                        None
73                    }
74                }
75
76                Token::Index { ref mut next, .. } => {
77                    trace!("self.pop_last(): self is Index");
78                    if next.is_some() {
79                        trace!("self.pop_last(): next is Some(_)");
80
81                        let mut n = next.take().unwrap();
82                        if n.has_next() {
83                            trace!("self.pop_last(): next also has a next");
84
85                            trace!("self.pop_last(): Recursing now");
86                            let result = n.pop_last();
87
88                            *next = Some(n);
89
90                            trace!("self.pop_last(): Returning Result");
91                            result
92                        } else {
93                            trace!("self.pop_last(): next itself has no next, returning Some");
94                            Some(n)
95                        }
96                    } else {
97                        trace!("self.pop_last(): next is none, returning None");
98                        None
99                    }
100                }
101            }
102        }
103    }
104
105    #[cfg(test)]
106    pub fn identifier(&self) -> &String {
107        trace!("self.identifier()");
108        match self {
109            Token::Identifier { ref ident, .. } => &ident,
110            _ => unreachable!(),
111        }
112    }
113
114    #[cfg(test)]
115    pub fn idx(&self) -> usize {
116        trace!("self.idx()");
117        match self {
118            Token::Index { idx: i, .. } => *i,
119            _ => unreachable!(),
120        }
121    }
122}
123
124pub fn tokenize_with_seperator(query: &str, seperator: char) -> Result<Token> {
125    use std::str::Split;
126    trace!(
127        "tokenize_with_seperator(query: {:?}, seperator: {:?})",
128        query,
129        seperator
130    );
131
132    /// Creates a Token object from a string
133    ///
134    /// # Panics
135    ///
136    /// * If the internal regex does not compile (should never happen)
137    /// * If the token is non-valid (that is, a array index with a non-i64)
138    /// * If the regex does not find anything
139    /// * If the integer in the brackets (`[]`) cannot be parsed to a valid i64
140    ///
141    /// # Incorrect behaviour
142    ///
143    /// * If the regex finds multiple captures
144    ///
145    /// # Returns
146    ///
147    /// The `Token` object with the correct identifier/index for this token and no next token.
148    ///
149    fn mk_token_object(s: &str) -> Result<Token> {
150        use regex::Regex;
151        use std::str::FromStr;
152
153        trace!("mk_token_object(s: {:?})", s);
154
155        lazy_static! {
156            static ref RE: Regex = Regex::new(r"^\[\d+\]$").unwrap();
157        }
158
159        if !has_array_brackets(s) {
160            trace!("returning Ok(Identifier(ident: {:?}, next: None))", s);
161            return Ok(Token::Identifier {
162                ident: String::from(s),
163                next: None,
164            });
165        }
166
167        match RE.captures(s) {
168            None => Err(Error::ArrayAccessWithoutIndex),
169            Some(captures) => {
170                trace!("Captured: {:?}", captures);
171                match captures.get(0) {
172                    None => Ok(Token::Identifier {
173                        ident: String::from(s),
174                        next: None,
175                    }),
176                    Some(mtch) => {
177                        trace!("First capture: {:?}", mtch);
178
179                        let mtch = without_array_brackets(mtch.as_str());
180                        trace!(".. without array brackets: {:?}", mtch);
181
182                        let i: usize = FromStr::from_str(&mtch).unwrap(); // save because regex
183
184                        trace!("returning Ok(Index(idx: {}, next: None)", i);
185                        Ok(Token::Index { idx: i, next: None })
186                    }
187                }
188            }
189        }
190    }
191
192    /// Check whether a str begins with '[' and ends with ']'
193    fn has_array_brackets(s: &str) -> bool {
194        trace!("has_array_brackets({:?})", s);
195        s.as_bytes()[0] == b'[' && s.as_bytes()[s.len() - 1] == b']'
196    }
197
198    /// Remove '[' and ']' from a str
199    fn without_array_brackets(s: &str) -> String {
200        trace!("without_array_brackets({:?})", s);
201        s.replace("[", "").replace("]", "")
202    }
203
204    fn build_token_tree(split: &mut Split<'_, char>, last: &mut Token) -> Result<()> {
205        trace!("build_token_tree(split: {:?}, last: {:?})", split, last);
206        match split.next() {
207            None => { /* No more tokens */ }
208            Some(token) => {
209                trace!("build_token_tree(...): next from split: {:?}", token);
210
211                if token.is_empty() {
212                    trace!("build_token_tree(...): Empty identifier... returning Error");
213                    return Err(Error::EmptyIdentifier);
214                }
215
216                let mut token = mk_token_object(token)?;
217                build_token_tree(split, &mut token)?;
218                last.set_next(token);
219            }
220        }
221
222        trace!("build_token_tree(...): returning Ok(())");
223        Ok(())
224    }
225
226    if query.is_empty() {
227        trace!("Query is empty. Returning error");
228        return Err(Error::EmptyQueryError);
229    }
230
231    let mut tokens = query.split(seperator);
232    trace!("Tokens splitted: {:?}", tokens);
233
234    match tokens.next() {
235        None => Err(Error::EmptyQueryError),
236        Some(token) => {
237            trace!("next Token: {:?}", token);
238
239            if token.is_empty() {
240                trace!("Empty token. Returning Error");
241                return Err(Error::EmptyIdentifier);
242            }
243
244            let mut tok = mk_token_object(token)?;
245            build_token_tree(&mut tokens, &mut tok)?;
246
247            trace!("Returning Ok({:?})", tok);
248            Ok(tok)
249        }
250    }
251}
252
253#[cfg(test)]
254mod test {
255    use super::*;
256    use crate::error::Error;
257
258    use std::ops::Deref;
259
260    #[test]
261    fn test_tokenize_empty_query_to_error() {
262        let tokens = tokenize_with_seperator(&String::from(""), '.');
263        assert!(tokens.is_err());
264        let tokens = tokens.unwrap_err();
265
266        assert!(is_match!(tokens, Error::EmptyQueryError { .. }));
267    }
268
269    #[test]
270    fn test_tokenize_seperator_only() {
271        let tokens = tokenize_with_seperator(&String::from("."), '.');
272        assert!(tokens.is_err());
273        let tokens = tokens.unwrap_err();
274
275        assert!(is_match!(tokens, Error::EmptyIdentifier { .. }));
276    }
277
278    #[test]
279    fn test_tokenize_array_brackets_only() {
280        let tokens = tokenize_with_seperator(&String::from("[]"), '.');
281        assert!(tokens.is_err());
282        let tokens = tokens.unwrap_err();
283
284        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex { .. }));
285    }
286
287    #[test]
288    fn test_tokenize_identifiers_with_array_brackets_only() {
289        let tokens = tokenize_with_seperator(&String::from("a.b.c.[]"), '.');
290        assert!(tokens.is_err());
291        let tokens = tokens.unwrap_err();
292
293        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex { .. }));
294    }
295
296    #[test]
297    fn test_tokenize_identifiers_in_array_brackets() {
298        let tokens = tokenize_with_seperator(&String::from("[a]"), '.');
299        assert!(tokens.is_err());
300        let tokens = tokens.unwrap_err();
301
302        assert!(is_match!(tokens, Error::ArrayAccessWithoutIndex { .. }));
303    }
304
305    #[test]
306    fn test_tokenize_single_token_query() {
307        let tokens = tokenize_with_seperator(&String::from("example"), '.');
308        assert!(tokens.is_ok());
309        let tokens = tokens.unwrap();
310
311        assert!(match tokens {
312            Token::Identifier {
313                ref ident,
314                next: None,
315            } => {
316                assert_eq!("example", ident);
317                true
318            }
319            _ => false,
320        });
321    }
322
323    #[test]
324    fn test_tokenize_double_token_query() {
325        let tokens = tokenize_with_seperator(&String::from("a.b"), '.');
326        assert!(tokens.is_ok());
327        let tokens = tokens.unwrap();
328
329        assert!(match tokens {
330            Token::Identifier {
331                next: Some(ref next),
332                ..
333            } => {
334                assert_eq!("b", next.deref().identifier());
335                match next.deref() {
336                    Token::Identifier { next: None, .. } => true,
337                    _ => false,
338                }
339            }
340            _ => false,
341        });
342        assert_eq!("a", tokens.identifier());
343    }
344
345    #[test]
346    fn test_tokenize_ident_then_array_query() {
347        let tokens = tokenize_with_seperator(&String::from("a.[0]"), '.');
348        assert!(tokens.is_ok());
349        let tokens = tokens.unwrap();
350
351        assert_eq!("a", tokens.identifier());
352        assert!(match tokens {
353            Token::Identifier {
354                next: Some(ref next),
355                ..
356            } => match next.deref() {
357                Token::Index { idx: 0, next: None } => true,
358                _ => false,
359            },
360            _ => false,
361        });
362    }
363
364    #[test]
365    fn test_tokenize_many_idents_then_array_query() {
366        let tokens = tokenize_with_seperator(&String::from("a.b.c.[1000]"), '.');
367        assert!(tokens.is_ok());
368        let tokens = tokens.unwrap();
369
370        assert_eq!("a", tokens.identifier());
371
372        let expected = Token::Identifier {
373            ident: String::from("a"),
374            next: Some(Box::new(Token::Identifier {
375                ident: String::from("b"),
376                next: Some(Box::new(Token::Identifier {
377                    ident: String::from("c"),
378                    next: Some(Box::new(Token::Index {
379                        idx: 1000,
380                        next: None,
381                    })),
382                })),
383            })),
384        };
385
386        assert_eq!(expected, tokens);
387    }
388
389    #[test]
390    fn test_tokenize_empty_token_after_good_token() {
391        let tokens = tokenize_with_seperator(&String::from("a..b"), '.');
392        assert!(tokens.is_err());
393        let tokens = tokens.unwrap_err();
394
395        assert!(is_match!(tokens, Error::EmptyIdentifier { .. }));
396    }
397
398    quickcheck! {
399        fn test_array_index(i: usize) -> bool {
400            match tokenize_with_seperator(&format!("[{}]", i), '.') {
401                Ok(Token::Index { next: None, ..  }) => true,
402                _                                    => false,
403            }
404        }
405    }
406
407    #[test]
408    fn test_pop_last_token_from_single_identifier_token_is_none() {
409        let mut token = Token::Identifier {
410            ident: String::from("something"),
411            next: None,
412        };
413
414        let last = token.pop_last();
415        assert!(last.is_none());
416    }
417
418    #[test]
419    fn test_pop_last_token_from_single_index_token_is_none() {
420        let mut token = Token::Index { idx: 0, next: None };
421
422        let last = token.pop_last();
423        assert!(last.is_none());
424    }
425
426    #[test]
427    fn test_pop_last_token_from_single_identifier_token_is_one() {
428        let mut token = Token::Identifier {
429            ident: String::from("some"),
430            next: Some(Box::new(Token::Identifier {
431                ident: String::from("thing"),
432                next: None,
433            })),
434        };
435
436        let last = token.pop_last();
437
438        assert!(last.is_some());
439        let last = last.unwrap();
440
441        assert!(is_match!(*last, Token::Identifier { .. }));
442        match *last {
443            Token::Identifier { ident, .. } => {
444                assert_eq!("thing", ident);
445            }
446            _ => panic!("What just happened?"),
447        }
448    }
449
450    #[test]
451    fn test_pop_last_token_from_single_index_token_is_one() {
452        let mut token = Token::Index {
453            idx: 0,
454            next: Some(Box::new(Token::Index { idx: 1, next: None })),
455        };
456
457        let last = token.pop_last();
458
459        assert!(last.is_some());
460        let last = last.unwrap();
461
462        assert!(is_match!(*last, Token::Index { idx: 1, .. }));
463    }
464
465    #[test]
466    fn test_pop_last_token_from_identifier_chain() {
467        let tokens = tokenize_with_seperator(&String::from("a.b.c.d.e.f"), '.');
468        assert!(tokens.is_ok());
469        let mut tokens = tokens.unwrap();
470
471        let last = tokens.pop_last();
472        assert!(last.is_some());
473        assert_eq!("f", last.unwrap().identifier());
474    }
475
476    #[test]
477    fn test_pop_last_token_from_mixed_chain() {
478        let tokens = tokenize_with_seperator(&String::from("a.[100].c.[3].e.f"), '.');
479        assert!(tokens.is_ok());
480        let mut tokens = tokens.unwrap();
481
482        let last = tokens.pop_last();
483        assert!(last.is_some());
484        assert_eq!("f", last.unwrap().identifier());
485    }
486
487    #[test]
488    fn test_pop_last_token_from_identifier_chain_is_array() {
489        let tokens = tokenize_with_seperator(&String::from("a.b.c.d.e.f.[1000]"), '.');
490        assert!(tokens.is_ok());
491        let mut tokens = tokens.unwrap();
492
493        let last = tokens.pop_last();
494        assert!(last.is_some());
495        assert_eq!(1000, last.unwrap().idx());
496    }
497
498    #[test]
499    fn test_pop_last_token_from_mixed_chain_is_array() {
500        let tokens = tokenize_with_seperator(&String::from("a.[100].c.[3].e.f.[1000]"), '.');
501        assert!(tokens.is_ok());
502        let mut tokens = tokens.unwrap();
503
504        let last = tokens.pop_last();
505        assert!(last.is_some());
506        assert_eq!(1000, last.unwrap().idx());
507    }
508
509    #[test]
510    fn test_pop_last_token_from_one_token() {
511        let tokens = tokenize_with_seperator(&String::from("a"), '.');
512        assert!(tokens.is_ok());
513        let mut tokens = tokens.unwrap();
514
515        let last = tokens.pop_last();
516        assert!(last.is_none());
517    }
518
519    #[test]
520    fn test_pop_last_chain() {
521        let tokens = tokenize_with_seperator(&String::from("a.[100].c.[3].e.f.[1000]"), '.');
522        assert!(tokens.is_ok());
523        let mut tokens = tokens.unwrap();
524
525        let last = tokens.pop_last();
526        assert!(last.is_some());
527        assert_eq!(1000, last.unwrap().idx());
528
529        let last = tokens.pop_last();
530        assert!(last.is_some());
531        assert_eq!("f", last.unwrap().identifier());
532
533        let last = tokens.pop_last();
534        assert!(last.is_some());
535        assert_eq!("e", last.unwrap().identifier());
536
537        let last = tokens.pop_last();
538        assert!(last.is_some());
539        assert_eq!(3, last.unwrap().idx());
540
541        let last = tokens.pop_last();
542        assert!(last.is_some());
543        assert_eq!("c", last.unwrap().identifier());
544
545        let last = tokens.pop_last();
546        assert!(last.is_some());
547        assert_eq!(100, last.unwrap().idx());
548
549        let last = tokens.pop_last();
550        assert!(last.is_none());
551    }
552}