pxfm/logs/
log2p1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::{biased_exponent_f64, get_exponent_f64, mantissa_f64};
30use crate::common::{dd_fmla, dyad_fmla, f_fmla};
31use crate::double_double::DoubleDouble;
32use crate::dyadic_float::{DyadicFloat128, DyadicSign};
33use crate::logs::log2p1_dyadic_tables::{LOG2P1_F128_POLY, LOG2P1_INVERSE_2, LOG2P1_LOG_INV_2};
34use crate::logs::log2p1_tables::{LOG2P1_EXACT, LOG2P1_INVERSE, LOG2P1_LOG_DD_INVERSE};
35
36/* put in h+l a double-double approximation of log(z)-z for
37|z| < 0.03125, with absolute error bounded by 2^-67.14
38(see analyze_p1a(-0.03125,0.03125) from log1p.sage) */
39#[inline]
40pub(crate) fn log_p_1a(z: f64) -> DoubleDouble {
41    let z2: DoubleDouble = if z.abs() >= f64::from_bits(0x3000000000000000) {
42        DoubleDouble::from_exact_mult(z, z)
43    } else {
44        // avoid spurious underflow
45        DoubleDouble::default()
46    };
47    let z4h = z2.hi * z2.hi;
48    /* The following is a degree-11 polynomial generated by Sollya
49    approximating log(1+x) for |x| < 0.03125,
50    with absolute error < 2^-73.441 and relative error < 2^-67.088
51    (see file Pabs_a.sollya).
52    The polynomial is P[0]*x + P[1]*x^2 + ... + P[10]*x^11.
53    The algorithm assumes that the degree-1 coefficient P[0] is 1
54    and the degree-2 coefficient P[1] is -0.5. */
55    const PA: [u64; 11] = [
56        0x3ff0000000000000,
57        0xbfe0000000000000,
58        0x3fd5555555555555,
59        0xbfcffffffffffe5f,
60        0x3fc999999999aa82,
61        0xbfc555555583a8c8,
62        0x3fc2492491c359e6,
63        0xbfbffffc728edeea,
64        0x3fbc71c961f34980,
65        0xbfb9a82ac77c05f4,
66        0x3fb74b40dd1707d3,
67    ];
68    let p910 = dd_fmla(f64::from_bits(PA[10]), z, f64::from_bits(PA[9]));
69    let p78 = dd_fmla(f64::from_bits(PA[8]), z, f64::from_bits(PA[7]));
70    let p56 = dd_fmla(f64::from_bits(PA[6]), z, f64::from_bits(PA[5]));
71    let p34 = dd_fmla(f64::from_bits(PA[4]), z, f64::from_bits(PA[3]));
72    let p710 = dd_fmla(p910, z2.hi, p78);
73    let p36 = dd_fmla(p56, z2.hi, p34);
74    let mut ph = dd_fmla(p710, z4h, p36);
75    ph = dd_fmla(ph, z, f64::from_bits(PA[2]));
76    ph *= z2.hi;
77    let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);
78    p.lo += -0.5 * z2.lo;
79    p
80}
81
82/* put in h+l a double-double approximation of log(z)-z for
83|z| < 0.00212097167968735, with absolute error bounded by 2^-78.25
84(see analyze_p1(-0.00212097167968735,0.00212097167968735)
85from accompanying file log1p.sage, which also yields |l| < 2^-69.99) */
86#[inline]
87fn p_1(z: f64) -> DoubleDouble {
88    const P: [u64; 7] = [
89        0x3ff0000000000000,
90        0xbfe0000000000000,
91        0x3fd5555555555550,
92        0xbfcfffffffff572d,
93        0x3fc999999a2d7868,
94        0xbfc5555c0d31b08e,
95        0x3fc2476b9058e396,
96    ];
97    let z2 = DoubleDouble::from_exact_mult(z, z);
98    let p56 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
99    let p34 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
100    let mut ph = dd_fmla(p56, z2.hi, p34);
101    /* ph approximates P[3]+P[4]*z+P[5]*z^2+P[6]*z^3 */
102    ph = dd_fmla(ph, z, f64::from_bits(P[2]));
103    /* ph approximates P[2]+P[3]*z+P[4]*z^2+P[5]*z^3+P[6]*z^4 */
104    ph *= z2.hi;
105    /* ph approximates P[2]*z^2+P[3]*z^3+P[4]*z^4+P[5]*z^5+P[6]*z^6 */
106    let mut p = DoubleDouble::from_exact_add(-0.5 * z2.hi, ph * z);
107
108    p.lo += -0.5 * z2.lo;
109    p
110}
111
112#[inline]
113pub(crate) fn log_fast(e: i32, v_u: u64) -> DoubleDouble {
114    let m: u64 = 0x10000000000000u64.wrapping_add(v_u & 0xfffffffffffff);
115    /* x = m/2^52 */
116    /* if x > sqrt(2), we divide it by 2 to avoid cancellation */
117    let c: i32 = if m >= 0x16a09e667f3bcd { 1 } else { 0 };
118    let e = e.wrapping_add(c); /* now -1074 <= e <= 1024 */
119    static CY: [f64; 2] = [1.0, 0.5];
120    static CM: [u32; 2] = [43, 44];
121
122    let i: i32 = (m >> CM[c as usize]) as i32;
123    let y = f64::from_bits(v_u) * CY[c as usize];
124    const OFFSET: i32 = 362;
125    let r = f64::from_bits(LOG2P1_INVERSE[(i - OFFSET) as usize]);
126    let log2_inv_dd = LOG2P1_LOG_DD_INVERSE[(i - OFFSET) as usize];
127    let l1 = f64::from_bits(log2_inv_dd.1);
128    let l2 = f64::from_bits(log2_inv_dd.0);
129    let z = dd_fmla(r, y, -1.0); /* exact */
130    /* evaluate P(z), for |z| < 0.00212097167968735 */
131
132    let p = p_1(z);
133
134    /* Add e*log(2) to (h,l), where -1074 <= e <= 1023, thus e has at most
135    11 bits. log2_h is an integer multiple of 2^-42, so that e*log2_h
136    is exact. */
137    const LOG2_H: f64 = f64::from_bits(0x3fe62e42fefa3800);
138    const LOG2_L: f64 = f64::from_bits(0x3d2ef35793c76730);
139
140    let ee = e as f64;
141    let mut vl = DoubleDouble::from_exact_add(f_fmla(ee, LOG2_H, l1), z);
142    vl.lo = p.hi + (vl.lo + (l2 + p.lo));
143
144    vl.lo = dd_fmla(ee, LOG2_L, vl.lo);
145
146    vl
147}
148
149const INV_LOG2_DD: DoubleDouble = DoubleDouble::new(
150    f64::from_bits(0x3c7777d0ffda0d24),
151    f64::from_bits(0x3ff71547652b82fe),
152);
153
154fn log2p1_accurate_small(x: f64) -> f64 {
155    static P_ACC: [u64; 24] = [
156        0x3ff71547652b82fe,
157        0x3c7777d0ffda0d24,
158        0xbfe71547652b82fe,
159        0xbc6777d0ffd9ddb8,
160        0x3fdec709dc3a03fd,
161        0x3c7d27f055481523,
162        0xbfd71547652b82fe,
163        0xbc5777d1456a14c4,
164        0x3fd2776c50ef9bfe,
165        0x3c7e4b2a04f81513,
166        0xbfcec709dc3a03fd,
167        0xbc6d2072e751087a,
168        0x3fca61762a7aded9,
169        0x3c5f90f4895378ac,
170        0xbfc71547652b8301,
171        0x3fc484b13d7c02ae,
172        0xbfc2776c50ef7591,
173        0x3fc0c9a84993cabb,
174        0xbfbec709de7b1612,
175        0x3fbc68f56ba73fd1,
176        0xbfba616c83da87e7,
177        0x3fb89f3042097218,
178        0xbfb72b376930a3fa,
179        0x3fb5d0211d5ab530,
180    ];
181
182    /* for degree 11 or more, ulp(c[d]*x^d) < 2^-105.5*|log2p1(x)|
183    where c[d] is the degree-d coefficient of Pacc, thus we can compute
184    with a double only */
185
186    let mut h = dd_fmla(f64::from_bits(P_ACC[23]), x, f64::from_bits(P_ACC[22])); // degree 16
187    for i in (11..=15).rev() {
188        h = dd_fmla(h, x, f64::from_bits(P_ACC[(i + 6) as usize])); // degree i
189    }
190    let mut l = 0.;
191    for i in (8..=10).rev() {
192        let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
193        l = p.lo;
194        p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(i + 6) as usize]), p.hi);
195        h = p.hi;
196        l += p.lo;
197    }
198    for i in (1..=7).rev() {
199        let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
200        l = p.lo;
201        p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(2 * i - 2) as usize]), p.hi);
202        h = p.hi;
203        l += p.lo + f64::from_bits(P_ACC[(2 * i - 1) as usize]);
204    }
205    let pz = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
206    pz.to_f64()
207}
208
209/* deal with |x| < 2^-900, then log2p1(x) ~ x/log(2) */
210#[cold]
211fn log2p1_accurate_tiny(x: f64) -> f64 {
212    // exceptional values
213    if x.abs() == f64::from_bits(0x0002c316a14459d8) {
214        return if x > 0. {
215            dd_fmla(
216                f64::from_bits(0x1a70000000000000),
217                f64::from_bits(0x1a70000000000000),
218                f64::from_bits(0x0003fc1ce8b1583f),
219            )
220        } else {
221            dd_fmla(
222                f64::from_bits(0x9a70000000000000),
223                f64::from_bits(0x1a70000000000000),
224                f64::from_bits(0x8003fc1ce8b1583f),
225            )
226        };
227    }
228
229    /* first scale x to avoid truncation of l in the underflow region */
230    let sx = x * f64::from_bits(0x4690000000000000);
231    let mut zh = DoubleDouble::quick_f64_mult(sx, INV_LOG2_DD);
232
233    let res = zh.to_f64() * f64::from_bits(0x3950000000000000); // expected result
234    zh.lo += dd_fmla(-res, f64::from_bits(0x4690000000000000), zh.hi);
235    // the correction to apply to res is l*2^-106
236    /* For all rounding modes, we have underflow
237    for |x| <= 0x1.62e42fefa39eep-1023 */
238    dyad_fmla(zh.lo, f64::from_bits(0x3950000000000000), res)
239}
240
241/* Given x > -1, put in (h,l) a double-double approximation of log2(1+x),
242   and return a bound err on the maximal absolute error so that:
243   |h + l - log2(1+x)| < err.
244   We have x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023.
245   This routine is adapted from cr_log1p_fast.
246*/
247#[inline]
248fn log2p1_fast(x: f64, e: i32) -> (DoubleDouble, f64) {
249    if e < -5
250    /* e <= -6 thus |x| < 2^-5 */
251    {
252        if e <= -969 {
253            /* then |x| might be as small as 2^-969, thus h=x/log(2) might in the
254            binade [2^-969,2^-968), with ulp(h) = 2^-1021, and if |l| < ulp(h),
255            then l.ulp() might be smaller than 2^-1074. We defer that case to
256            the accurate path. */
257            // *h = *l = 0;
258            // return 1;
259            let ax = x.abs();
260            let result = if ax < f64::from_bits(0x3960000000000000) {
261                log2p1_accurate_tiny(x)
262            } else {
263                log2p1_accurate_small(x)
264            };
265            return (DoubleDouble::new(0.0, result), 0.0);
266        }
267        let mut p = log_p_1a(x);
268        let p_lo = p.lo;
269        p = DoubleDouble::from_exact_add(x, p.hi);
270        p.lo += p_lo;
271        p = DoubleDouble::quick_mult(p, INV_LOG2_DD);
272        return (p, f64::from_bits(0x3c1d400000000000) * p.hi); /* 2^-61.13 < 0x1.d4p-62 */
273    }
274
275    /* (xh,xl) <- 1+x */
276    let zx = DoubleDouble::from_full_exact_add(1.0, x);
277    let mut v_u = zx.hi.to_bits();
278    let e = ((v_u >> 52) as i32).wrapping_sub(0x3ff);
279    v_u = (0x3ffu64 << 52) | (v_u & 0xfffffffffffff);
280    let mut p = log_fast(e, v_u);
281
282    /* log(xh+xl) = log(xh) + log(1+xl/xh) */
283    let c = if zx.hi <= f64::from_bits(0x7fd0000000000000) || zx.lo.abs() >= 4.0 {
284        zx.lo / zx.hi
285    } else {
286        0.
287    }; // avoid spurious underflow
288
289    /* Since |xl| < ulp(xh), we have |xl| < 2^-52 |xh|,
290    thus |c| < 2^-52, and since |log(1+x)-x| < x^2 for |x| < 0.5,
291    we have |log(1+c)-c)| < c^2 < 2^-104. */
292    p.lo += c;
293
294    /* now multiply h+l by 1/log(2) */
295    p = DoubleDouble::quick_mult(p, INV_LOG2_DD);
296
297    (p, f64::from_bits(0x3bb2300000000000)) /* 2^-67.82 < 0x1.23p-68 */
298}
299
300fn log_dyadic_taylor_poly(x: DyadicFloat128) -> DyadicFloat128 {
301    let mut r = LOG2P1_F128_POLY[12];
302    for i in (0..12).rev() {
303        r = x * r + LOG2P1_F128_POLY[i];
304    }
305    r * x
306}
307
308pub(crate) fn log2_dyadic(d: DyadicFloat128, x: f64) -> DyadicFloat128 {
309    let biased_exp = biased_exponent_f64(x);
310    let e = get_exponent_f64(x);
311    let base_mant = mantissa_f64(x);
312    let mant = base_mant + if biased_exp != 0 { 1u64 << 52 } else { 0 };
313    let lead = mant.leading_zeros();
314
315    let kk = e - (if lead > 11 { lead - 12 } else { 0 }) as i64;
316    let mut fe: i16 = kk as i16;
317
318    let adjusted_mant = mant << lead;
319
320    // Find the lookup index
321    let mut i: i16 = (adjusted_mant >> 55) as i16;
322
323    if adjusted_mant > 0xb504f333f9de6484 {
324        fe = fe.wrapping_add(1);
325        i >>= 1;
326    }
327
328    let mut x = d;
329
330    x.exponent = x.exponent.wrapping_sub(fe);
331    let inverse_2 = LOG2P1_INVERSE_2[(i - 128) as usize];
332    let mut z = x * inverse_2;
333
334    const F128_MINUS_ONE: DyadicFloat128 = DyadicFloat128 {
335        sign: DyadicSign::Neg,
336        exponent: -127,
337        mantissa: 0x8000_0000_0000_0000_0000_0000_0000_0000_u128,
338    };
339
340    z = z + F128_MINUS_ONE;
341
342    const LOG2: DyadicFloat128 = DyadicFloat128 {
343        sign: DyadicSign::Pos,
344        exponent: -128,
345        mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
346    };
347
348    // E·log(2)
349    let r = LOG2.mul_int64(fe as i64);
350
351    let mut p = log_dyadic_taylor_poly(z);
352    p = LOG2P1_LOG_INV_2[(i - 128) as usize] + p;
353    p + r
354}
355
356#[cold]
357fn log2p1_accurate(x: f64) -> f64 {
358    let ax = x.abs();
359
360    if ax < f64::from_bits(0x3fa0000000000000) {
361        return if ax < f64::from_bits(0x3960000000000000) {
362            log2p1_accurate_tiny(x)
363        } else {
364            log2p1_accurate_small(x)
365        };
366    }
367    let dx = if x > 1.0 {
368        DoubleDouble::from_exact_add(x, 1.0)
369    } else {
370        DoubleDouble::from_exact_add(1.0, x)
371    };
372    /* log2p1(x) is exact when 1+x = 2^e, thus when 2^e-1 is exactly
373    representable. This can only occur when xl=0 here. */
374    let mut t: u64 = x.to_bits();
375    if dx.lo == 0. {
376        /* check if xh is a power of two */
377        t = dx.hi.to_bits();
378        if (t.wrapping_shl(12)) == 0 {
379            let e = ((t >> 52) as i32).wrapping_sub(0x3ff);
380            return e as f64;
381        }
382    }
383    /* if x=2^e, the accurate path will fail for directed roundings */
384    if (t.wrapping_shl(12)) == 0 {
385        let e: i32 = ((t >> 52) as i32).wrapping_sub(0x3ff); // x = 2^e
386
387        /* for e >= 49, log2p1(x) rounds to e for rounding to nearest;
388        for e >= 48, log2p1(x) rounds to e for rounding toward zero;
389        for e >= 48, log2p1(x) rounds to nextabove(e) for rounding up;
390        for e >= 48, log2p1(x) rounds to e for rounding down. */
391        if e >= 49 {
392            return e as f64 + f64::from_bits(0x3cf0000000000000); // 0x1p-48 = 1/2 ulp(49)
393        }
394    }
395    let x_d = DyadicFloat128::new_from_f64(dx.hi);
396    let mut y = log2_dyadic(x_d, dx.hi);
397    let mut c = DyadicFloat128::from_div_f64(dx.lo, dx.hi);
398    let mut bx = c * c;
399    /* multiply X by -1/2 */
400    bx.exponent -= 1;
401    bx.sign = DyadicSign::Neg;
402    /* C <- C - C^2/2 */
403    c = c + bx;
404    /* |C-log(1+xl/xh)| ~ 2e-64 */
405    y = y + c;
406    const LOG2_INV: DyadicFloat128 = DyadicFloat128 {
407        sign: DyadicSign::Pos,
408        exponent: -115,
409        mantissa: 0xb8aa_3b29_5c17_f0bb_be87_fed0_691d_3e89_u128,
410    };
411    y = y * LOG2_INV;
412    y.exponent -= 12;
413    y.fast_as_f64()
414}
415
416/// Computes log2(x+1)
417///
418/// Max ULP 0.5
419pub fn f_log2p1(x: f64) -> f64 {
420    let x_u = x.to_bits();
421    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
422    if e == 0x400 || x == 0. || x <= -1.0 {
423        /* case NaN/Inf, +/-0 or x <= -1 */
424        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
425            /* NaN or + Inf*/
426            return x + x;
427        }
428        if x <= -1.0
429        /* we use the fact that NaN < -1 is false */
430        {
431            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
432            return if x < -1.0 {
433                f64::NAN
434            } else {
435                // x=-1
436                f64::NEG_INFINITY
437            };
438        }
439        return x + x; /* +/-0 */
440    }
441
442    /* now x > -1 */
443
444    /* check x=2^n-1 for 0 <= n <= 53, where log2p1(x) is exact,
445    and we shouldn't raise the inexact flag */
446    if 0 <= e && e <= 52 {
447        /* T[e]=2^(e+1)-1, i.e., the unique value of the form 2^n-1
448        in the interval [2^e, 2^(e+1)). */
449        if x == f64::from_bits(LOG2P1_EXACT[e as usize]) {
450            return (e + 1) as f64;
451        }
452    }
453
454    /* For x=2^k-1, -53 <= k <= -1, log2p1(x) = k is also exact. */
455    if e == -1 && x < 0. {
456        // -1 < x <= -1/2
457        let w = (1.0 + x).to_bits(); // 1+x is exact
458        if w.wrapping_shl(12) == 0 {
459            // 1+x = 2^k
460            let k: i32 = ((w >> 52) as i32).wrapping_sub(0x3ff);
461            return k as f64;
462        }
463    }
464
465    /* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
466    let (p, err) = log2p1_fast(x, e);
467    let left = p.hi + (p.lo - err);
468    let right = p.hi + (p.lo + err);
469    if left == right {
470        return left;
471    }
472    log2p1_accurate(x)
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478    #[test]
479    fn test_log2p1() {
480        assert_eq!(f_log2p1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008344095884546873),
481                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012037985753337781);
482        assert_eq!(f_log2p1(0.00006669877554532304), 0.00009622278377734607);
483        assert_eq!(f_log2p1(1.00006669877554532304), 1.0000481121941047);
484        assert_eq!(f_log2p1(-0.90006669877554532304), -3.322890675865049);
485    }
486}