pxfm/logs/fast_log.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::logs::{LOG_COEFFS, LOG_R_DD, LOG_RANGE_REDUCTION};
32use crate::polyeval::f_polyeval4;
33
34#[inline]
35pub(crate) fn simple_fast_log(x: f64) -> f64 {
36 let x_u = x.to_bits();
37
38 const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
39
40 let mut x_e: i32 = -(E_BIAS as i32);
41
42 // log2(x) = log2(2^x_e * x_m)
43 // = x_e + log2(x_m)
44 // Range reduction for log2(x_m):
45 // For each x_m, we would like to find r such that:
46 // -2^-8 <= r * x_m - 1 < 2^-7
47 let shifted = (x_u >> 45) as i32;
48 let index = shifted & 0x7F;
49 let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
50
51 // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
52 // all 1's.
53 x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
54 let e_x = x_e as f64;
55
56 const LOG_2_HI: f64 = f64::from_bits(0x3fe62e42fefa3800);
57 const LOG_2_LO: f64 = f64::from_bits(0x3d2ef35793c76730);
58
59 let log_r_dd = LOG_R_DD[index as usize];
60
61 // hi is exact
62 let hi = f_fmla(e_x, LOG_2_HI, f64::from_bits(log_r_dd.1));
63 // lo errors ~ e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo) + rounding err
64 // <= 2 * (e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo))
65 let lo = f_fmla(e_x, LOG_2_LO, f64::from_bits(log_r_dd.0));
66
67 // Set m = 1.mantissa.
68 let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
69 let m = f64::from_bits(x_m);
70
71 let u;
72 #[cfg(any(
73 all(
74 any(target_arch = "x86", target_arch = "x86_64"),
75 target_feature = "fma"
76 ),
77 all(target_arch = "aarch64", target_feature = "neon")
78 ))]
79 {
80 u = f_fmla(r, m, -1.0); // exact
81 }
82 #[cfg(not(any(
83 all(
84 any(target_arch = "x86", target_arch = "x86_64"),
85 target_feature = "fma"
86 ),
87 all(target_arch = "aarch64", target_feature = "neon")
88 )))]
89 {
90 use crate::logs::LOG_CD;
91 let c_m = x_m & 0x3FFF_E000_0000_0000u64;
92 let c = f64::from_bits(c_m);
93 u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
94 }
95
96 let r1 = DoubleDouble::from_exact_add(hi, u);
97
98 let u_sq = u * u;
99 // Degree-7 minimax polynomial
100 let p0 = f_fmla(
101 u,
102 f64::from_bits(LOG_COEFFS[1]),
103 f64::from_bits(LOG_COEFFS[0]),
104 );
105 let p1 = f_fmla(
106 u,
107 f64::from_bits(LOG_COEFFS[3]),
108 f64::from_bits(LOG_COEFFS[2]),
109 );
110 let p2 = f_fmla(
111 u,
112 f64::from_bits(LOG_COEFFS[5]),
113 f64::from_bits(LOG_COEFFS[4]),
114 );
115 let p = f_polyeval4(u_sq, lo + r1.lo, p0, p1, p2);
116 r1.hi + p
117}