litrs/
parse.rs

1use crate::{
2    err::{
3        perr,
4        ParseErrorKind::{self, *},
5    },
6    BoolLit, Buffer, ByteLit, ByteStringLit, CStringLit, CharLit, FloatLit, IntegerLit, Literal,
7    ParseError, StringLit,
8};
9
10
11pub fn parse<B: Buffer>(input: B) -> Result<Literal<B>, ParseError> {
12    let (first, rest) = input.as_bytes().split_first().ok_or(perr(None, Empty))?;
13    let second = input.as_bytes().get(1).copied();
14
15    match first {
16        b'f' if &*input == "false" => Ok(Literal::Bool(BoolLit::False)),
17        b't' if &*input == "true" => Ok(Literal::Bool(BoolLit::True)),
18
19        // A number literal (integer or float).
20        b'0'..=b'9' => {
21            // To figure out whether this is a float or integer, we do some
22            // quick inspection here. Yes, this is technically duplicate
23            // work with what is happening in the integer/float parse
24            // methods, but it makes the code way easier for now and won't
25            // be a huge performance loss.
26            //
27            // The first non-decimal char in a float literal must
28            // be '.', 'e' or 'E'.
29            match input.as_bytes().get(1 + end_dec_digits(rest)) {
30                Some(b'.') | Some(b'e') | Some(b'E') => FloatLit::parse(input).map(Literal::Float),
31
32                _ => IntegerLit::parse(input).map(Literal::Integer),
33            }
34        }
35
36        b'\'' => CharLit::parse(input).map(Literal::Char),
37        b'"' | b'r' => StringLit::parse(input).map(Literal::String),
38
39        b'b' if second == Some(b'\'') => ByteLit::parse(input).map(Literal::Byte),
40        b'b' if second == Some(b'r') || second == Some(b'"') => {
41            ByteStringLit::parse(input).map(Literal::ByteString)
42        }
43
44        b'c' => CStringLit::parse(input).map(Literal::CString),
45
46        _ => Err(perr(None, InvalidLiteral)),
47    }
48}
49
50
51pub(crate) fn first_byte_or_empty(s: &str) -> Result<u8, ParseError> {
52    s.as_bytes().first().copied().ok_or(perr(None, Empty))
53}
54
55/// Returns the index of the first non-underscore, non-decimal digit in `input`,
56/// or the `input.len()` if all characters are decimal digits.
57pub(crate) fn end_dec_digits(input: &[u8]) -> usize {
58    input.iter()
59        .position(|b| !matches!(b, b'_' | b'0'..=b'9'))
60        .unwrap_or(input.len())
61}
62
63pub(crate) fn hex_digit_value(digit: u8) -> Option<u8> {
64    match digit {
65        b'0'..=b'9' => Some(digit - b'0'),
66        b'a'..=b'f' => Some(digit - b'a' + 10),
67        b'A'..=b'F' => Some(digit - b'A' + 10),
68        _ => None,
69    }
70}
71
72/// Makes sure that `s` is a valid literal suffix.
73pub(crate) fn check_suffix(s: &str) -> Result<(), ParseErrorKind> {
74    if s.is_empty() {
75        return Ok(());
76    }
77
78    let mut chars = s.chars();
79    let first = chars.next().unwrap();
80    let rest = chars.as_str();
81    if first == '_' && rest.is_empty() {
82        return Err(InvalidSuffix);
83    }
84
85    // This is just an extra check to improve the error message. If the first
86    // character of the "suffix" is already some invalid ASCII
87    // char, "unexpected character" seems like the more fitting error.
88    if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
89        return Err(UnexpectedChar);
90    }
91
92    // Proper check is optional as it's not really necessary in proc macro
93    // context.
94    #[cfg(feature = "check_suffix")]
95    fn is_valid_suffix(first: char, rest: &str) -> bool {
96        use unicode_xid::UnicodeXID;
97
98        (first == '_' || first.is_xid_start())
99            && rest.chars().all(|c| c.is_xid_continue())
100    }
101
102    // When avoiding the dependency on `unicode_xid`, we just do a best effort
103    // to catch the most common errors.
104    #[cfg(not(feature = "check_suffix"))]
105    fn is_valid_suffix(first: char, rest: &str) -> bool {
106        if first.is_ascii() && !(first.is_ascii_alphabetic() || first == '_') {
107            return false;
108        }
109        for c in rest.chars() {
110            if c.is_ascii() && !(c.is_ascii_alphanumeric() || c == '_') {
111                return false;
112            }
113        }
114        true
115    }
116
117    if is_valid_suffix(first, rest) {
118        Ok(())
119    } else {
120        Err(InvalidSuffix)
121    }
122}