pxfm/logs/
fast_log.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::logs::{LOG_COEFFS, LOG_R_DD, LOG_RANGE_REDUCTION};
32use crate::polyeval::f_polyeval4;
33
34#[inline]
35pub(crate) fn simple_fast_log(x: f64) -> f64 {
36    let x_u = x.to_bits();
37
38    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
39
40    let mut x_e: i32 = -(E_BIAS as i32);
41
42    // log2(x) = log2(2^x_e * x_m)
43    //         = x_e + log2(x_m)
44    // Range reduction for log2(x_m):
45    // For each x_m, we would like to find r such that:
46    //   -2^-8 <= r * x_m - 1 < 2^-7
47    let shifted = (x_u >> 45) as i32;
48    let index = shifted & 0x7F;
49    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
50
51    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
52    // all 1's.
53    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
54    let e_x = x_e as f64;
55
56    const LOG_2_HI: f64 = f64::from_bits(0x3fe62e42fefa3800);
57    const LOG_2_LO: f64 = f64::from_bits(0x3d2ef35793c76730);
58
59    let log_r_dd = LOG_R_DD[index as usize];
60
61    // hi is exact
62    let hi = f_fmla(e_x, LOG_2_HI, f64::from_bits(log_r_dd.1));
63    // lo errors ~ e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo) + rounding err
64    //           <= 2 * (e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo))
65    let lo = f_fmla(e_x, LOG_2_LO, f64::from_bits(log_r_dd.0));
66
67    // Set m = 1.mantissa.
68    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
69    let m = f64::from_bits(x_m);
70
71    let u;
72    #[cfg(any(
73        all(
74            any(target_arch = "x86", target_arch = "x86_64"),
75            target_feature = "fma"
76        ),
77        all(target_arch = "aarch64", target_feature = "neon")
78    ))]
79    {
80        u = f_fmla(r, m, -1.0); // exact
81    }
82    #[cfg(not(any(
83        all(
84            any(target_arch = "x86", target_arch = "x86_64"),
85            target_feature = "fma"
86        ),
87        all(target_arch = "aarch64", target_feature = "neon")
88    )))]
89    {
90        use crate::logs::LOG_CD;
91        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
92        let c = f64::from_bits(c_m);
93        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
94    }
95
96    let r1 = DoubleDouble::from_exact_add(hi, u);
97
98    let u_sq = u * u;
99    // Degree-7 minimax polynomial
100    let p0 = f_fmla(
101        u,
102        f64::from_bits(LOG_COEFFS[1]),
103        f64::from_bits(LOG_COEFFS[0]),
104    );
105    let p1 = f_fmla(
106        u,
107        f64::from_bits(LOG_COEFFS[3]),
108        f64::from_bits(LOG_COEFFS[2]),
109    );
110    let p2 = f_fmla(
111        u,
112        f64::from_bits(LOG_COEFFS[5]),
113        f64::from_bits(LOG_COEFFS[4]),
114    );
115    let p = f_polyeval4(u_sq, lo + r1.lo, p0, p1, p2);
116    r1.hi + p
117}