mesh_loader/utils/float/
lemire.rs

1//! Implementation of the Eisel-Lemire algorithm.
2
3use super::{
4    common::BiasedFp,
5    float::RawFloat,
6    table::{LARGEST_POWER_OF_FIVE, POWER_OF_FIVE_128, SMALLEST_POWER_OF_FIVE},
7};
8
9/// Compute w * 10^q using an extended-precision float representation.
10///
11/// Fast conversion of a the significant digits and decimal exponent
12/// a float to an extended representation with a binary float. This
13/// algorithm will accurately parse the vast majority of cases,
14/// and uses a 128-bit representation (with a fallback 192-bit
15/// representation).
16///
17/// This algorithm scales the exponent by the decimal exponent
18/// using pre-computed powers-of-5, and calculates if the
19/// representation can be unambiguously rounded to the nearest
20/// machine float. Near-halfway cases are not handled here,
21/// and are represented by a negative, biased binary exponent.
22///
23/// The algorithm is described in detail in "Daniel Lemire, Number Parsing
24/// at a Gigabyte per Second" in section 5, "Fast Algorithm", and
25/// section 6, "Exact Numbers And Ties", available online:
26/// <https://arxiv.org/abs/2101.11408.pdf>.
27#[inline(always)]
28pub(crate) fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
29    let fp_zero = BiasedFp::zero_pow2(0);
30    let fp_inf = BiasedFp::zero_pow2(F::INFINITE_POWER);
31    let fp_error = BiasedFp::zero_pow2(-1);
32
33    // Short-circuit if the value can only be a literal 0 or infinity.
34    if w == 0 || q < F::SMALLEST_POWER_OF_TEN as i64 {
35        return fp_zero;
36    } else if q > F::LARGEST_POWER_OF_TEN as i64 {
37        return fp_inf;
38    }
39    // Normalize our significant digits, so the most-significant bit is set.
40    let lz = w.leading_zeros();
41    w <<= lz;
42    let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
43    if lo == 0xFFFF_FFFF_FFFF_FFFF {
44        // If we have failed to approximate w x 5^-q with our 128-bit value.
45        // Since the addition of 1 could lead to an overflow which could then
46        // round up over the half-way point, this can lead to improper rounding
47        // of a float.
48        //
49        // However, this can only occur if q ∈ [-27, 55]. The upper bound of q
50        // is 55 because 5^55 < 2^128, however, this can only happen if 5^q > 2^64,
51        // since otherwise the product can be represented in 64-bits, producing
52        // an exact result. For negative exponents, rounding-to-even can
53        // only occur if 5^-q < 2^64.
54        //
55        // For detailed explanations of rounding for negative exponents, see
56        // <https://arxiv.org/pdf/2101.11408.pdf#section.9.1>. For detailed
57        // explanations of rounding for positive exponents, see
58        // <https://arxiv.org/pdf/2101.11408.pdf#section.8>.
59        let inside_safe_exponent = (q >= -27) && (q <= 55);
60        if !inside_safe_exponent {
61            return fp_error;
62        }
63    }
64    let upper_bit = (hi >> 63) as i32;
65    let mut mantissa = hi >> (upper_bit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3);
66    let mut power2 = power(q as i32) + upper_bit - lz as i32 - F::MINIMUM_EXPONENT;
67    if power2 <= 0 {
68        if -power2 + 1 >= 64 {
69            // Have more than 64 bits below the minimum exponent, must be 0.
70            return fp_zero;
71        }
72        // Have a subnormal value.
73        mantissa >>= -power2 + 1;
74        mantissa += mantissa & 1;
75        mantissa >>= 1;
76        power2 = (mantissa >= (1_u64 << F::MANTISSA_EXPLICIT_BITS)) as i32;
77        return BiasedFp {
78            f: mantissa,
79            e: power2,
80        };
81    }
82    // Need to handle rounding ties. Normally, we need to round up,
83    // but if we fall right in between and we have an even basis, we
84    // need to round down.
85    //
86    // This will only occur if:
87    //  1. The lower 64 bits of the 128-bit representation is 0.
88    //      IE, 5^q fits in single 64-bit word.
89    //  2. The least-significant bit prior to truncated mantissa is odd.
90    //  3. All the bits truncated when shifting to mantissa bits + 1 are 0.
91    //
92    // Or, we may fall between two floats: we are exactly halfway.
93    if lo <= 1
94        && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
95        && q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
96        && mantissa & 3 == 1
97        && (mantissa << (upper_bit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
98    {
99        // Zero the lowest bit, so we don't round up.
100        mantissa &= !1_u64;
101    }
102    // Round-to-even, then shift the significant digits into place.
103    mantissa += mantissa & 1;
104    mantissa >>= 1;
105    if mantissa >= (2_u64 << F::MANTISSA_EXPLICIT_BITS) {
106        // Rounding up overflowed, so the carry bit is set. Set the
107        // mantissa to 1 (only the implicit, hidden bit is set) and
108        // increase the exponent.
109        mantissa = 1_u64 << F::MANTISSA_EXPLICIT_BITS;
110        power2 += 1;
111    }
112    // Zero out the hidden bit.
113    mantissa &= !(1_u64 << F::MANTISSA_EXPLICIT_BITS);
114    if power2 >= F::INFINITE_POWER {
115        // Exponent is above largest normal value, must be infinite.
116        return fp_inf;
117    }
118    BiasedFp {
119        f: mantissa,
120        e: power2,
121    }
122}
123
124/// Calculate a base 2 exponent from a decimal exponent.
125/// This uses a pre-computed integer approximation for
126/// log2(10), where 217706 / 2^16 is accurate for the
127/// entire range of non-finite decimal exponents.
128#[inline]
129const fn power(q: i32) -> i32 {
130    (q.wrapping_mul(152_170 + 65536) >> 16) + 63
131}
132
133#[inline]
134const fn full_multiplication(a: u64, b: u64) -> (u64, u64) {
135    let r = (a as u128) * (b as u128);
136    (r as u64, (r >> 64) as u64)
137}
138
139// This will compute or rather approximate w * 5**q and return a pair of 64-bit words
140// approximating the result, with the "high" part corresponding to the most significant
141// bits and the low part corresponding to the least significant bits.
142#[inline]
143fn compute_product_approx(q: i64, w: u64, precision: usize) -> (u64, u64) {
144    debug_assert!(q >= SMALLEST_POWER_OF_FIVE as i64);
145    debug_assert!(q <= LARGEST_POWER_OF_FIVE as i64);
146    debug_assert!(precision <= 64);
147
148    let mask = if precision < 64 {
149        0xFFFF_FFFF_FFFF_FFFF_u64 >> precision
150    } else {
151        0xFFFF_FFFF_FFFF_FFFF_u64
152    };
153
154    // 5^q < 2^64, then the multiplication always provides an exact value.
155    // That means whenever we need to round ties to even, we always have
156    // an exact value.
157    let index = (q - SMALLEST_POWER_OF_FIVE as i64) as usize;
158    let (lo5, hi5) = POWER_OF_FIVE_128[index];
159    // Only need one multiplication as long as there is 1 zero but
160    // in the explicit mantissa bits, +1 for the hidden bit, +1 to
161    // determine the rounding direction, +1 for if the computed
162    // product has a leading zero.
163    let (mut first_lo, mut first_hi) = full_multiplication(w, lo5);
164    if first_hi & mask == mask {
165        // Need to do a second multiplication to get better precision
166        // for the lower product. This will always be exact
167        // where q is < 55, since 5^55 < 2^128. If this wraps,
168        // then we need to round up the hi product.
169        let (_, second_hi) = full_multiplication(w, hi5);
170        first_lo = first_lo.wrapping_add(second_hi);
171        if second_hi > first_lo {
172            first_hi += 1;
173        }
174    }
175    (first_lo, first_hi)
176}
mesh_loader/utils/float/lemire.rs

mesh_loader/utils/float/
lemire.rs