pxfm/logs/
log10p1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::*;
30use crate::double_double::DoubleDouble;
31use crate::dyadic_float::{DyadicFloat128, DyadicSign};
32use crate::logs::log2p1::{log_fast, log_p_1a, log2_dyadic};
33use crate::logs::log10p1_tables::{LOG10P1_EXACT_INT_S_TABLE, LOG10P1_EXACT_INT_TABLE};
34
35const INV_LOG10_DD: DoubleDouble = DoubleDouble::new(
36    f64::from_bits(0x3c695355baaafad3),
37    f64::from_bits(0x3fdbcb7b1526e50e),
38);
39
40/* deal with |x| < 2^-900, then log10p1(x) ~ x/log(10) */
41#[cold]
42fn log10p1_accurate_tiny(x: f64) -> f64 {
43    /* first scale x to avoid truncation of l in the underflow region */
44    let sx = x * f64::from_bits(0x4690000000000000);
45    let mut px = DoubleDouble::quick_f64_mult(sx, INV_LOG10_DD);
46
47    let res = px.to_f64() * f64::from_bits(0x3950000000000000); // expected result
48    px.lo += dd_fmla(-res, f64::from_bits(0x4690000000000000), px.hi);
49    // the correction to apply to res is l*2^-106
50    /* For RNDN, we have underflow for |x| <= 0x1.26bb1bbb55515p-1021,
51    and for rounding away, for |x| < 0x1.26bb1bbb55515p-1021. */
52
53    dyad_fmla(px.lo, f64::from_bits(0x3950000000000000), res)
54}
55
56fn log10p1_accurate_small(x: f64) -> f64 {
57    /* the following is a degree-17 polynomial approximating log10p1(x) for
58    |x| <= 2^-5 with relative error < 2^-105.067*/
59
60    static P_ACC: [u64; 25] = [
61        0x3fdbcb7b1526e50e,
62        0x3c695355baaafad4,
63        0xbfcbcb7b1526e50e,
64        0xbc595355baaae078,
65        0x3fc287a7636f435f,
66        0xbc59c871838f83ac,
67        0xbfbbcb7b1526e50e,
68        0xbc495355e23285f2,
69        0x3fb63c62775250d8,
70        0x3c4442abd5831422,
71        0xbfb287a7636f435f,
72        0x3c49d116f225c4e4,
73        0x3fafc3fa615105c7,
74        0x3c24e1d7b4790510,
75        0xbfabcb7b1526e512,
76        0x3c49f884199ab0ce,
77        0x3fa8b4df2f3f0486,
78        0xbfa63c6277522391,
79        0x3fa436e526a79e5c,
80        0xbfa287a764c5a762,
81        0x3fa11ac1e784daec,
82        0xbf9fc3eedc920817,
83        0x3f9da5cac3522edb,
84        0xbf9be5ca1f9a97cd,
85        0x3f9a44b64ca06e9b,
86    ];
87
88    /* for degree 11 or more, ulp(c[d]*x^d) < 2^-105.7*|log10p1(x)|
89    where c[d] is the degree-d coefficient of Pacc, thus we can compute
90    with a double only, and even with degree 10 (this does not increase
91    the number of exceptional cases) */
92
93    let mut h = dd_fmla(f64::from_bits(P_ACC[24]), x, f64::from_bits(P_ACC[23])); // degree 16
94    for i in (10..=15).rev() {
95        h = dd_fmla(h, x, f64::from_bits(P_ACC[(i + 7) as usize])); // degree i
96    }
97
98    // degree 9
99    let px = DoubleDouble::from_exact_mult(x, h);
100    let hl = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[9 + 7]), px.hi);
101    h = hl.hi;
102    let mut l = px.lo + hl.lo;
103
104    for i in (1..=8).rev() {
105        let mut p = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
106        l = p.lo;
107        p = DoubleDouble::from_exact_add(f64::from_bits(P_ACC[(2 * i - 2) as usize]), p.hi);
108        h = p.hi;
109        l += p.lo + f64::from_bits(P_ACC[(2 * i - 1) as usize]);
110    }
111    let pz = DoubleDouble::quick_f64_mult(x, DoubleDouble::new(l, h));
112    pz.to_f64()
113}
114
115#[cold]
116fn log10p1_accurate(x: f64) -> f64 {
117    let ax = x.abs();
118
119    if ax < f64::from_bits(0x3fa0000000000000) {
120        return if ax < f64::from_bits(0x07b0000000000000) {
121            log10p1_accurate_tiny(x)
122        } else {
123            log10p1_accurate_small(x)
124        };
125    }
126    let dx = if x > 1.0 {
127        DoubleDouble::from_exact_add(x, 1.0)
128    } else {
129        DoubleDouble::from_exact_add(1.0, x)
130    };
131    let x_d = DyadicFloat128::new_from_f64(dx.hi);
132    let mut y = log2_dyadic(x_d, dx.hi);
133    let mut c = DyadicFloat128::from_div_f64(dx.lo, dx.hi);
134    let mut bx = c * c;
135    /* multiply X by -1/2 */
136    bx.exponent -= 1;
137    bx.sign = DyadicSign::Neg;
138    /* C <- C - C^2/2 */
139    c = c + bx;
140    /* |C-log(1+xl/xh)| ~ 2e-64 */
141    y = y + c;
142
143    // Sage Math:
144    // from sage.all import *
145    //
146    // def format_hex2(value):
147    //     l = hex(value)[2:]
148    //     n = 4
149    //     x = [l[i:i + n] for i in range(0, len(l), n)]
150    //     return "0x" + "_".join(x) + "_u128"
151    // (s, m, e) = (RealField(128)(1)/RealField(128)(10)).log().sign_mantissa_exponent();
152    // print(format_hex2(m));
153    const LOG10_INV: DyadicFloat128 = DyadicFloat128 {
154        sign: DyadicSign::Pos,
155        exponent: -129,
156        mantissa: 0xde5b_d8a9_3728_7195_355b_aaaf_ad33_dc32_u128,
157    };
158    y = y * LOG10_INV;
159    y.fast_as_f64()
160}
161
162#[inline]
163fn log10p1_fast(x: f64, e: i32) -> (DoubleDouble, f64) {
164    if e < -5
165    /* e <= -6 thus |x| < 2^-5 */
166    {
167        if e <= -968 {
168            /* then |x| might be as small as 2^-968, thus h=x/log(10) might in the
169            binade [2^-970,2^-969), with ulp(h) = 2^-1022, and if |l| < ulp(h),
170            then l.ulp() might be smaller than 2^-1074. We defer that case to
171            the accurate path. */
172            let ax = x.abs();
173            let result = if ax < f64::from_bits(0x07b0000000000000) {
174                log10p1_accurate_tiny(x)
175            } else {
176                log10p1_accurate_small(x)
177            };
178            return (DoubleDouble::new(0.0, result), 0.0);
179        }
180        let mut p = log_p_1a(x);
181        let p_lo = p.lo;
182        p = DoubleDouble::from_exact_add(x, p.hi);
183        p.lo += p_lo;
184        p = DoubleDouble::quick_mult(p, INV_LOG10_DD);
185        return (p, f64::from_bits(0x3c1d400000000000) * p.hi); /* 2^-61.13 < 0x1.d4p-62 */
186    }
187
188    /* (xh,xl) <- 1+x */
189    let zx = DoubleDouble::from_full_exact_add(x, 1.0);
190
191    let mut v_u = zx.hi.to_bits();
192    let e = ((v_u >> 52) as i32).wrapping_sub(0x3ff);
193    v_u = (0x3ffu64 << 52) | (v_u & 0xfffffffffffff);
194    let mut p = log_fast(e, v_u);
195
196    /* log(xh+xl) = log(xh) + log(1+xl/xh) */
197    let c = if zx.hi <= f64::from_bits(0x7fd0000000000000) || zx.lo.abs() >= 4.0 {
198        zx.lo / zx.hi
199    } else {
200        0.
201    }; // avoid spurious underflow
202
203    /* Since |xl| < ulp(xh), we have |xl| < 2^-52 |xh|,
204    thus |c| < 2^-52, and since |log(1+x)-x| < x^2 for |x| < 0.5,
205    we have |log(1+c)-c)| < c^2 < 2^-104. */
206    p.lo += c;
207
208    /* now multiply h+l by 1/log(2) */
209    p = DoubleDouble::quick_mult(p, INV_LOG10_DD);
210    (p, f64::from_bits(0x3bb0a00000000000)) /* 2^-67.92 < 0x1.0ap-68 */
211}
212
213/// Computes log10(x+1)
214///
215/// Max ULP 0.5
216pub fn f_log10p1(x: f64) -> f64 {
217    let x_u = x.to_bits();
218    let e = (((x_u >> 52) & 0x7ff) as i32).wrapping_sub(0x3ff);
219    if e == 0x400 || x == 0. || x <= -1.0 {
220        /* case NaN/Inf, +/-0 or x <= -1 */
221        if e == 0x400 && x.to_bits() != 0xfffu64 << 52 {
222            /* NaN or + Inf*/
223            return x + x;
224        }
225        if x <= -1.0
226        /* we use the fact that NaN < -1 is false */
227        {
228            /* log2p(x<-1) is NaN, log2p(-1) is -Inf and raises DivByZero */
229            return if x < -1.0 {
230                f64::NAN
231            } else {
232                // x=-1
233                f64::NEG_INFINITY
234            };
235        }
236        return x + x; /* +/-0 */
237    }
238
239    /* check x=10^n-1 for 1 <= n <= 15, where log10p1(x) is exact,
240    and we shouldn't raise the inexact flag */
241    if 3 <= e && e <= 49 && x == f64::from_bits(LOG10P1_EXACT_INT_TABLE[e as usize]) {
242        return LOG10P1_EXACT_INT_S_TABLE[e as usize] as f64;
243    }
244
245    /* now x = m*2^e with 1 <= m < 2 (m = v.f) and -1074 <= e <= 1023 */
246    let (p, err) = log10p1_fast(x, e);
247    let left = p.hi + (p.lo - err);
248    let right = p.hi + (p.lo + err);
249    if left == right {
250        return left;
251    }
252
253    log10p1_accurate(x)
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_log10p1() {
262        assert_eq!(f_log10p1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000013904929147106097),
263                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006038833999843867 );
264        assert!(f_log10p1(-2.0).is_nan());
265        assert_eq!(f_log10p1(9.0), 1.0);
266        assert_eq!(f_log10p1(2.0), 0.47712125471966244);
267        assert_eq!(f_log10p1(-0.5), -0.3010299956639812);
268    }
269}