mesh_loader/utils/float/
parse.rs

1//! Functions to parse floating-point numbers.
2
3use super::{
4    common::{is_8digits, ByteSlice},
5    float::RawFloat,
6    number::Number,
7};
8
9const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
10
11/// Parse 8 digits, loaded as bytes in little-endian order.
12///
13/// This uses the trick where every digit is in [0x030, 0x39],
14/// and therefore can be parsed in 3 multiplications, much
15/// faster than the normal 8.
16///
17/// This is based off the algorithm described in "Fast numeric string to
18/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
19const fn parse_8digits(mut v: u64) -> u64 {
20    const MASK: u64 = 0x0000_00FF_0000_00FF;
21    const MUL1: u64 = 0x000F_4240_0000_0064;
22    const MUL2: u64 = 0x0000_2710_0000_0001;
23    v -= 0x3030_3030_3030_3030;
24    v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
25    let v1 = (v & MASK).wrapping_mul(MUL1);
26    let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
27    ((v1.wrapping_add(v2) >> 32) as u32) as u64
28}
29
30/// Parse digits until a non-digit character is found.
31#[inline]
32pub(crate) fn try_parse_digits(s: &mut &[u8], x: &mut u64) {
33    // may cause overflows, to be handled later
34    while s.len() >= 8 {
35        let num = s.read_u64le();
36        if is_8digits(num) {
37            *x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
38            *s = &s[8..];
39        } else {
40            break;
41        }
42    }
43
44    *s = s.parse_digits(|digit| {
45        *x = x.wrapping_mul(10).wrapping_add(digit as u64);
46    });
47}
48
49/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
50fn try_parse_19digits(s: &mut &[u8], x: &mut u64) {
51    while *x < MIN_19DIGIT_INT {
52        // FIXME: Can't use s.split_first() here yet,
53        // see https://github.com/rust-lang/rust/issues/109328
54        // (fixed in LLVM 17)
55        if let [c, s_next @ ..] = s {
56            let digit = c.wrapping_sub(b'0');
57
58            if digit < 10 {
59                *x = (*x * 10) + digit as u64; // no overflows here
60                *s = s_next;
61            } else {
62                break;
63            }
64        } else {
65            break;
66        }
67    }
68}
69
70/// Parse the scientific notation component of a float.
71fn parse_scientific(s: &mut &[u8]) -> Option<i64> {
72    let mut exponent = 0i64;
73    let mut negative = false;
74
75    if let Some((&c, s_next)) = s.split_first() {
76        negative = c == b'-';
77        if c == b'-' || c == b'+' {
78            *s = s_next;
79        }
80    }
81
82    if matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
83        *s = s.parse_digits(|digit| {
84            // no overflows here, saturate well before overflow
85            if exponent < 0x10000 {
86                exponent = 10 * exponent + digit as i64;
87            }
88        });
89        if negative {
90            Some(-exponent)
91        } else {
92            Some(exponent)
93        }
94    } else {
95        None
96    }
97}
98
99/// Parse a partial, non-special floating point number.
100///
101/// This creates a representation of the float as the
102/// significant digits and the decimal exponent.
103#[inline(always)]
104pub(crate) fn parse_partial_number(mut s: &[u8], full_start: &[u8]) -> Option<(Number, usize)> {
105    debug_assert!(!s.is_empty());
106
107    // parse initial digits before dot
108    let mut mantissa = 0_u64;
109    let start = s;
110    try_parse_digits(&mut s, &mut mantissa);
111    let mut n_digits = s.offset_from(start);
112
113    // handle dot with the following digits
114    let mut n_after_dot = 0;
115    let mut exponent = 0_i64;
116    let int_end = s;
117
118    if let Some((&b'.', s_next)) = s.split_first() {
119        s = s_next;
120        let before = s;
121        try_parse_digits(&mut s, &mut mantissa);
122        n_after_dot = s.offset_from(before);
123        exponent = -n_after_dot as i64;
124    }
125
126    n_digits += n_after_dot;
127    if n_digits == 0 {
128        return None;
129    }
130
131    // handle scientific format
132    let mut exp_number = 0_i64;
133    if let Some((&c, s_next)) = s.split_first() {
134        if c == b'e' || c == b'E' {
135            s = s_next;
136            // If None, we have no trailing digits after exponent, or an invalid float.
137            exp_number = parse_scientific(&mut s)?;
138            exponent += exp_number;
139        }
140    }
141
142    let len = s.offset_from(full_start) as usize;
143
144    // handle uncommon case with many digits
145    if n_digits <= 19 {
146        return Some((
147            Number {
148                exponent,
149                mantissa,
150                negative: false,
151                many_digits: false,
152            },
153            len,
154        ));
155    }
156
157    n_digits -= 19;
158    let mut many_digits = false;
159    let mut p = start;
160    while let Some((&c, p_next)) = p.split_first() {
161        if c == b'.' || c == b'0' {
162            n_digits -= c.saturating_sub(b'0' - 1) as isize;
163            p = p_next;
164        } else {
165            break;
166        }
167    }
168    if n_digits > 0 {
169        // at this point we have more than 19 significant digits, let's try again
170        many_digits = true;
171        mantissa = 0;
172        let mut s = start;
173        try_parse_19digits(&mut s, &mut mantissa);
174        exponent = if mantissa >= MIN_19DIGIT_INT {
175            // big int
176            int_end.offset_from(s)
177        } else {
178            s = &s[1..];
179            let before = s;
180            try_parse_19digits(&mut s, &mut mantissa);
181            -s.offset_from(before)
182        } as i64;
183        // add back the explicit part
184        exponent += exp_number;
185    }
186
187    Some((
188        Number {
189            exponent,
190            mantissa,
191            negative: false,
192            many_digits,
193        },
194        len,
195    ))
196}
197
198/// Try to parse a special, non-finite float.
199pub(crate) fn parse_inf_nan<F: RawFloat>(s: &[u8], negative: bool) -> Option<(F, usize)> {
200    // Since a valid string has at most the length 8, we can load
201    // all relevant characters into a u64 and work from there.
202    // This also generates much better code.
203
204    let mut register;
205
206    if s.len() >= 8 {
207        register = s.read_u64le();
208    } else if s.len() >= 3 {
209        let a = s[0] as u64;
210        let b = s[1] as u64;
211        let c = s[2] as u64;
212        register = (c << 16) | (b << 8) | a;
213    } else {
214        return None;
215    }
216
217    // Clear out the bits which turn ASCII uppercase characters into
218    // lowercase characters. The resulting string is all uppercase.
219    // What happens to other characters is irrelevant.
220    register &= 0xDFDFDFDFDFDFDFDF;
221
222    // u64 values corresponding to relevant cases
223    const INF_3: u64 = 0x464E49; // "INF"
224    const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
225    const NAN: u64 = 0x4E414E; // "NAN"
226
227    // Match register value to constant to parse string.
228    // Also match on the string length to catch edge cases
229    // like "inf\0\0\0\0\0".
230    let (float, len) = match register & 0xFFFFFF {
231        INF_3 => {
232            let len = if register == INF_8 { 8 } else { 3 };
233            (F::INFINITY, len)
234        }
235        NAN => (F::NAN, 3),
236        _ => return None,
237    };
238
239    if negative {
240        Some((-float, len))
241    } else {
242        Some((float, len))
243    }
244}