litrs/
parse.rs

1use crate::{
2    BoolLit,
3    Buffer,
4    ByteLit,
5    ByteStringLit,
6    CharLit,
7    ParseError,
8    FloatLit,
9    IntegerLit,
10    Literal,
11    StringLit,
12    err::{perr, ParseErrorKind::{*, self}},
13};
14
15
16pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
17    let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
18    let second = input.as_bytes().get(1).copied();
19
20    match first {
21        b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
22        b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),
23
24        // A number literal (integer or float).
25        b'0'..=b'9' => {
26            // To figure out whether this is a float or integer, we do some
27            // quick inspection here. Yes, this is technically duplicate
28            // work with what is happening in the integer/float parse
29            // methods, but it makes the code way easier for now and won't
30            // be a huge performance loss.
31            //
32            // The first non-decimal char in a float literal must
33            // be '.', 'e' or 'E'.
34            match input.as_bytes().get(1 + end_dec_digits(rest)) {
35                Some(b'.') | Some(b'e') | Some(b'E')
36                    => FloatLit::parse(input).map(Literal::Float),
37
38                _ => IntegerLit::parse(input).map(Literal::Integer),
39            }
40        },
41
42        b'\'' => CharLit::parse(input).map(Literal::Char),
43        b'"' | b'r' => StringLit::parse(input).map(Literal::String),
44
45        b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte),
46        b'b' if second == Some(b'r') || second == Some(b'"')
47            => ByteStringLit::parse(input).map(Literal::ByteString),
48
49        _ => Err(perr(None, InvalidLiteral)),
50    }
51}
52
53
54pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
55    s.as_bytes().get(0).copied().ok_or(perr(None, Empty))
56}
57
58/// Returns the index of the first non-underscore, non-decimal digit in `input`,
59/// or the `input.len()` if all characters are decimal digits.
60pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
61    input.iter()
62        .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
63        .unwrap_or(input.len())
64}
65
66pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
67    match digit {
68        b'0'..=b'9' => Some(digit - b'0'),
69        b'a'..=b'f' => Some(digit - b'a' + 10),
70        b'A'..=b'F' => Some(digit - b'A' + 10),
71        _ => None,
72    }
73}
74
75/// Makes sure that `s` is a valid literal suffix.
76pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
77    if s.is_empty() {
78        return Ok(());
79    }
80
81    let mut chars = s.chars();
82    let first = chars.next().unwrap();
83    let rest = chars.as_str();
84    if first == '_' && rest.is_empty() {
85        return Err(InvalidSuffix);
86    }
87
88    // This is just an extra check to improve the error message. If the first
89    // character of the "suffix" is already some invalid ASCII
90    // char, "unexpected character" seems like the more fitting error.
91    if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
92        return Err(UnexpectedChar);
93    }
94
95    // Proper check is optional as it's not really necessary in proc macro
96    // context.
97    #[cfg(feature = "check_suffix")]
98    fn is_valid_suffix(first: char, rest: &str) -> bool {
99        use unicode_xid::UnicodeXID;
100
101        (first == '_' || first.is_xid_start())
102            && rest.chars().all(|c| c.is_xid_continue())
103    }
104
105    // When avoiding the dependency on `unicode_xid`, we just do a best effort
106    // to catch the most common errors.
107    #[cfg(not(feature = "check_suffix"))]
108    fn is_valid_suffix(first: char, rest: &str) -> bool {
109        if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
110            return false;
111        }
112        for c in rest.chars() {
113            if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') {
114                return false;
115            }
116        }
117        true
118    }
119
120    if is_valid_suffix(first, rest) {
121        Ok(())
122    } else {
123        Err(InvalidSuffix)
124    }
125}