pxfm/logs/
log.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, fmla, min_normal_f64};
30use crate::double_double::DoubleDouble;
31use crate::dyadic_float::{DyadicFloat128, DyadicSign};
32use crate::logs::log_dyadic::{LOG_STEP_1, LOG_STEP_2, LOG_STEP_3, LOG_STEP_4};
33use crate::logs::log_range_reduction::log_range_reduction;
34use crate::logs::log_td::log_td;
35use crate::logs::log2::LOG_RANGE_REDUCTION;
36use crate::logs::log10::LOG_R_DD;
37use crate::logs::{LOG_COEFFS, log_dd};
38use crate::polyeval::f_polyeval4;
39
40/// Assumes that NaN and infinities, negatives were filtered out
41pub(crate) fn log_dyadic(x: f64) -> DyadicFloat128 {
42    let mut x_u = x.to_bits();
43
44    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
45
46    let mut x_e: i32 = -(E_BIAS as i32);
47
48    const MAX_NORMAL: u64 = f64::to_bits(f64::MAX);
49
50    if x_u == 1f64.to_bits() {
51        // log2(1.0) = +0.0
52        return DyadicFloat128 {
53            sign: DyadicSign::Pos,
54            exponent: 0,
55            mantissa: 0u128,
56        };
57    }
58    if x_u < min_normal_f64().to_bits() || x_u > MAX_NORMAL {
59        // Normalize denormal inputs.
60        x_u = (x * f64::from_bits(0x4330000000000000)).to_bits();
61        x_e -= 52;
62    }
63
64    // Range reduction for log2(x_m):
65    // For each x_m, we would like to find r such that:
66    //   -2^-8 <= r * x_m - 1 < 2^-7
67    let shifted = (x_u >> 45) as i32;
68    let index = shifted & 0x7F;
69    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
70
71    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
72    // all 1's.
73    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
74
75    // Set m = 1.mantissa.
76    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
77    let m = f64::from_bits(x_m);
78
79    let u;
80    #[cfg(any(
81        all(
82            any(target_arch = "x86", target_arch = "x86_64"),
83            target_feature = "fma"
84        ),
85        all(target_arch = "aarch64", target_feature = "neon")
86    ))]
87    {
88        u = f_fmla(r, m, -1.0); // exact
89    }
90    #[cfg(not(any(
91        all(
92            any(target_arch = "x86", target_arch = "x86_64"),
93            target_feature = "fma"
94        ),
95        all(target_arch = "aarch64", target_feature = "neon")
96    )))]
97    {
98        use crate::logs::log2::LOG_CD;
99        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
100        let c = f64::from_bits(c_m);
101        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
102    }
103    log_accurate(x_e, index, u)
104}
105
106// Reuse the output of the fast pass range reduction.
107// -2^-8 <= m_x < 2^-7
108#[cold]
109fn log_accurate(e_x: i32, index: i32, m_x: f64) -> DyadicFloat128 {
110    // > P = fpminimax((log(1 + x) - x)/x^2, 2, [|1, 128...|],
111    //                 [-0x1.0002143p-29 , 0x1p-29]);
112    // > P;
113    // > dirtyinfnorm(log(1 + x)/x - x*P, [-0x1.0002143p-29 , 0x1p-29]);
114    // 0x1.99a3...p-121
115    const BIG_COEFFS: [DyadicFloat128; 3] = [
116        DyadicFloat128 {
117            sign: DyadicSign::Neg,
118            exponent: -129,
119            mantissa: 0x8000_0000_0006_a710_b59c_58e5_554d_581c_u128,
120        },
121        DyadicFloat128 {
122            sign: DyadicSign::Pos,
123            exponent: -129,
124            mantissa: 0xaaaa_aaaa_aaaa_aabd_de05_c7c9_4ae9_cbae_u128,
125        },
126        DyadicFloat128 {
127            sign: DyadicSign::Neg,
128            exponent: -128,
129            mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
130        },
131    ];
132
133    const LOG_2: DyadicFloat128 = DyadicFloat128 {
134        sign: DyadicSign::Pos,
135        exponent: -128,
136        mantissa: 0xb17217f7_d1cf79ab_c9e3b398_03f2f6af_u128,
137    };
138
139    let e_x_f128 = DyadicFloat128::new_from_f64(e_x as f64);
140    let mut sum = LOG_2 * e_x_f128;
141    sum = sum + LOG_STEP_1[index as usize];
142
143    let (v_f128, mut sum) = log_range_reduction(
144        m_x,
145        &[&LOG_STEP_1, &LOG_STEP_2, &LOG_STEP_3, &LOG_STEP_4],
146        sum,
147    );
148
149    sum = sum + v_f128;
150
151    // Polynomial approximation
152    let mut p = v_f128 * BIG_COEFFS[0];
153
154    p = v_f128 * (p + BIG_COEFFS[1]);
155    p = v_f128 * (p + BIG_COEFFS[2]);
156    p = v_f128 * p;
157
158    sum + p
159}
160
161/// Natural logarithm
162///
163/// Max found ULP 0.5
164pub fn f_log(x: f64) -> f64 {
165    let mut x_u = x.to_bits();
166
167    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
168
169    let mut x_e: i32 = -(E_BIAS as i32);
170
171    const MAX_NORMAL: u64 = f64::to_bits(f64::MAX);
172
173    if x_u == 1f64.to_bits() {
174        // log2(1.0) = +0.0
175        return 0.0;
176    }
177    if x_u < min_normal_f64().to_bits() || x_u > MAX_NORMAL {
178        if x == 0.0 {
179            return f64::NEG_INFINITY;
180        }
181        if x < 0. || x.is_nan() {
182            return f64::NAN;
183        }
184        if x.is_infinite() || x.is_nan() {
185            return x + x;
186        }
187        // Normalize denormal inputs.
188        x_u = (x * f64::from_bits(0x4330000000000000)).to_bits();
189        x_e -= 52;
190    }
191
192    // log2(x) = log2(2^x_e * x_m)
193    //         = x_e + log2(x_m)
194    // Range reduction for log2(x_m):
195    // For each x_m, we would like to find r such that:
196    //   -2^-8 <= r * x_m - 1 < 2^-7
197    let shifted = (x_u >> 45) as i32;
198    let index = shifted & 0x7F;
199    let r = f64::from_bits(LOG_RANGE_REDUCTION[index as usize]);
200
201    // Add unbiased exponent. Add an extra 1 if the 8 leading fractional bits are
202    // all 1's.
203    x_e = x_e.wrapping_add(x_u.wrapping_add(1u64 << 45).wrapping_shr(52) as i32);
204    let e_x = x_e as f64;
205
206    const LOG_2_HI: f64 = f64::from_bits(0x3fe62e42fefa3800);
207    const LOG_2_LO: f64 = f64::from_bits(0x3d2ef35793c76730);
208
209    let log_r_dd = LOG_R_DD[index as usize];
210
211    // hi is exact
212    let hi = f_fmla(e_x, LOG_2_HI, f64::from_bits(log_r_dd.1));
213    // lo errors ~ e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo) + rounding err
214    //           <= 2 * (e_x * LSB(LOG_2_LO) + LSB(LOG_R[index].lo))
215    let lo = f_fmla(e_x, LOG_2_LO, f64::from_bits(log_r_dd.0));
216
217    // Set m = 1.mantissa.
218    let x_m = (x_u & 0x000F_FFFF_FFFF_FFFFu64) | 0x3FF0_0000_0000_0000u64;
219    let m = f64::from_bits(x_m);
220
221    let u;
222    #[cfg(any(
223        all(
224            any(target_arch = "x86", target_arch = "x86_64"),
225            target_feature = "fma"
226        ),
227        all(target_arch = "aarch64", target_feature = "neon")
228    ))]
229    {
230        u = f_fmla(r, m, -1.0); // exact
231    }
232    #[cfg(not(any(
233        all(
234            any(target_arch = "x86", target_arch = "x86_64"),
235            target_feature = "fma"
236        ),
237        all(target_arch = "aarch64", target_feature = "neon")
238    )))]
239    {
240        use crate::logs::log2::LOG_CD;
241        let c_m = x_m & 0x3FFF_E000_0000_0000u64;
242        let c = f64::from_bits(c_m);
243        u = f_fmla(r, m - c, f64::from_bits(LOG_CD[index as usize])); // exact
244    }
245
246    let r1 = DoubleDouble::from_exact_add(hi, u);
247
248    let u_sq = u * u;
249
250    // Degree-7 minimax polynomial
251    // Minimax polynomial for (log(1 + x) - x)/x^2, generated by sollya with:
252    // > P = fpminimax((log(1 + x) - x)/x^2, 5, [|D...|], [-2^-8, 2^-7]);
253
254    let p0 = f_fmla(
255        u,
256        f64::from_bits(LOG_COEFFS[1]),
257        f64::from_bits(LOG_COEFFS[0]),
258    );
259    let p1 = f_fmla(
260        u,
261        f64::from_bits(LOG_COEFFS[3]),
262        f64::from_bits(LOG_COEFFS[2]),
263    );
264    let p2 = f_fmla(
265        u,
266        f64::from_bits(LOG_COEFFS[5]),
267        f64::from_bits(LOG_COEFFS[4]),
268    );
269    let p = f_polyeval4(u_sq, lo + r1.lo, p0, p1, p2);
270
271    const HI_ERR: f64 = f64::from_bits(0x3aa0000000000000);
272
273    // Extra errors from P is from using x^2 to reduce evaluation latency.
274    const P_ERR: f64 = f64::from_bits(0x3cd0000000000000);
275
276    // Technicallly error of r1.lo is bounded by:
277    //    hi*ulp(log(2)_lo) + C*ulp(u^2)
278    // To simplify the error computation a bit, we replace |hi|*ulp(log(2)_lo)
279    // with the upper bound: 2^11 * ulp(log(2)_lo) = 2^-85.
280    // Total error is bounded by ~ C * ulp(u^2) + 2^-85.
281    let err = f_fmla(u_sq, P_ERR, HI_ERR);
282
283    // Lower bound from the result
284    let left = r1.hi + (p - err);
285    // Upper bound from the result
286    let right = r1.hi + (p + err);
287
288    // Ziv's test if fast pass is accurate enough.
289    if left == right {
290        return left;
291    }
292
293    log_accurate_slow(x)
294}
295
296#[cold]
297#[inline(never)]
298fn log_accurate_slow(x: f64) -> f64 {
299    let r = log_dd(x);
300    let err = f_fmla(
301        r.hi,
302        f64::from_bits(0x3b50000000000000), // 2^-74
303        f64::from_bits(0x3990000000000000), // 2^-102
304    );
305    let ub = r.hi + (r.lo + err);
306    let lb = r.hi + (r.lo - err);
307    if ub == lb {
308        return r.to_f64();
309    }
310    log_accurate_slow_td(x)
311}
312
313#[cold]
314#[inline(never)]
315fn log_accurate_slow_td(x: f64) -> f64 {
316    log_td(x).to_f64()
317}
318
319/// Log for given value for const context.
320/// This is simplified version just to make a good approximation on const context.
321#[inline]
322pub const fn log(d: f64) -> f64 {
323    const LN_POLY_2_D: f64 = 0.6666666666666762678e+0;
324    const LN_POLY_3_D: f64 = 0.3999999999936908641e+0;
325    const LN_POLY_4_D: f64 = 0.2857142874046159249e+0;
326    const LN_POLY_5_D: f64 = 0.2222219947428228041e+0;
327    const LN_POLY_6_D: f64 = 0.1818349302807168999e+0;
328    const LN_POLY_7_D: f64 = 0.1531633000781658996e+0;
329    const LN_POLY_8_D: f64 = 0.1476969208015536904e+0;
330
331    let e = d.to_bits().wrapping_shr(52).wrapping_sub(0x3ff);
332    if e >= 0x400 || e == 0x00000000fffffc01 {
333        let minf = 0xfffu64 << 52;
334        if e == 0x400 || (e == 0xc00 && d != f64::from_bits(minf)) {
335            /* +Inf or NaN */
336            return d + d;
337        }
338        if d <= 0. {
339            return if d < 0. { f64::NAN } else { f64::NEG_INFINITY };
340        }
341    }
342
343    // reduce into [sqrt(2)/2;sqrt(2)]
344    let mut ui: u64 = d.to_bits();
345    let mut hx = (ui >> 32) as u32;
346    hx = hx.wrapping_add(0x3ff00000 - 0x3fe6a09e);
347    let n = (hx >> 20) as i32 - 0x3ff;
348    hx = (hx & 0x000fffff).wrapping_add(0x3fe6a09e);
349    ui = (hx as u64) << 32 | (ui & 0xffffffff);
350    let a = f64::from_bits(ui);
351
352    let m = a - 1.;
353
354    let x = m / (a + 1.);
355    let x2 = x * x;
356    let f = x2;
357
358    const LN2_H: f64 = 0.6931471805599453;
359    const LN2_L: f64 = 2.3190468138462996e-17;
360
361    let mut u = LN_POLY_8_D;
362    u = fmla(u, f, LN_POLY_7_D);
363    u = fmla(u, f, LN_POLY_6_D);
364    u = fmla(u, f, LN_POLY_5_D);
365    u = fmla(u, f, LN_POLY_4_D);
366    u = fmla(u, f, LN_POLY_3_D);
367    u = fmla(u, f, LN_POLY_2_D);
368    u *= f;
369
370    let t = m * m * 0.5;
371    let r = fmla(x, t, fmla(x, u, LN2_L * n as f64)) - t + m;
372    fmla(LN2_H, n as f64, r)
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378
379    #[test]
380    fn log_test() {
381        assert!(
382            (log(1f64) - 0f64).abs() < 1e-8,
383            "Invalid result {}",
384            log(1f64)
385        );
386        assert!(
387            (log(5f64) - 1.60943791243410037460f64).abs() < 1e-8,
388            "Invalid result {}",
389            log(5f64)
390        );
391    }
392
393    #[test]
394    fn f_log_test() {
395        assert_eq!(f_log(1.99999999779061), 0.693147179455250308807056);
396        assert_eq!(f_log(0.9999999999999999), -1.1102230246251565e-16);
397        assert!(
398            (f_log(1f64) - 0f64).abs() < 1e-8,
399            "Invalid result {}",
400            f_log(1f64)
401        );
402        assert!(
403            (f_log(5f64) - 5f64.ln()).abs() < 1e-8,
404            "Invalid result {}, expected {}",
405            f_log(5f64),
406            5f64.ln()
407        );
408        assert_eq!(
409            f_log(23f64),
410            3.13549421592914969080675283181019611844238031484043574199863537748299324598,
411            "Invalid result {}, expected {}",
412            f_log(23f64),
413            3.13549421592914969080675283181019611844238031484043574199863537748299324598,
414        );
415        assert_eq!(f_log(0.), f64::NEG_INFINITY);
416        assert!(f_log(-1.).is_nan());
417        assert!(f_log(f64::NAN).is_nan());
418        assert!(f_log(f64::NEG_INFINITY).is_nan());
419        assert_eq!(f_log(f64::INFINITY), f64::INFINITY);
420    }
421
422    #[test]
423    fn log_control_values() {
424        assert_eq!(
425            f_log(f64::from_bits(0x3ff1211bef8f68e9)),
426            0.06820362355801819
427        );
428        assert_eq!(
429            f_log(f64::from_bits(0x3ff008000db2e8be)),
430            0.0019512710640270448
431        );
432        assert_eq!(
433            f_log(f64::from_bits(0x3ff10803885617a6)),
434            0.062464334544603616
435        );
436        assert_eq!(
437            f_log(f64::from_bits(0x3ff48ae5a67204f5)),
438            0.24991043470757288
439        );
440        assert_eq!(
441            f_log(f64::from_bits(0x3fedc0b586f2b260)),
442            -0.07281366978582131
443        );
444        assert_eq!(
445            f_log(f64::from_bits(0x3fe490af72a25a81)),
446            -0.44213668842326787
447        );
448        assert_eq!(
449            f_log(f64::from_bits(0x4015b6e7e4e96f86)),
450            1.6916847703128641
451        );
452        assert_eq!(
453            f_log(f64::from_bits(0x3ff0ffc349469a2f)),
454            0.06057012512237759
455        );
456        assert_eq!(
457            f_log(f64::from_bits(0x3fe69e7aa6da2df5)),
458            -0.3469430325599064
459        );
460        assert_eq!(
461            f_log(f64::from_bits(0x3fe5556123e8a2b0)),
462            -0.4054566631657151
463        );
464    }
465}