pxfm/logs/
log1pmx.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::{EXP_MASK, get_exponent_f64};
30use crate::common::f_fmla;
31use crate::double_double::DoubleDouble;
32use crate::logs::log1p::{LOG_R1_DD, R1};
33use crate::logs::log1p_dd;
34use crate::polyeval::{f_estrin_polyeval8, f_polyeval4, f_polyeval5};
35
36#[cold]
37fn tiny_hard(x: f64) -> f64 {
38    // d = [-2^-10; 2^-10];
39    // f_log1pf = (log(1+x) - x)/x^2;
40    // Q = fpminimax(f_log1pf, 7, [|0, 107...|], d);
41    // See ./notes/log1pmx_at_zero_hard.sollya
42
43    let x2 = DoubleDouble::from_exact_mult(x, x);
44
45    const C: [(u64, u64); 7] = [
46        (0x3c755555555129de, 0x3fd5555555555555),
47        (0xbbd333352fe28400, 0xbfd0000000000000),
48        (0xbc698cb3ef1ea2dd, 0x3fc999999999999a),
49        (0x3c4269700b3f95d0, 0xbfc55555555546ef),
50        (0x3c61290e9261823e, 0x3fc2492492491565),
51        (0x3c6fb0a243c2a59c, 0xbfc000018af8cb7e),
52        (0x3c3b271ceb5c60a0, 0x3fbc71ca10b30518),
53    ];
54
55    let mut r = DoubleDouble::mul_f64_add(
56        DoubleDouble::from_bit_pair(C[6]),
57        x,
58        DoubleDouble::from_bit_pair(C[5]),
59    );
60    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[4]));
61    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[3]));
62    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[2]));
63    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[1]));
64    r = DoubleDouble::mul_f64_add(r, x, DoubleDouble::from_bit_pair(C[0]));
65    r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
66    r = DoubleDouble::quick_mult(r, x2);
67    r.to_f64()
68}
69
70fn log1pmx_big(x: f64) -> f64 {
71    let mut x_u = x.to_bits();
72
73    let mut x_dd = DoubleDouble::default();
74
75    let x_exp: u16 = ((x_u >> 52) & 0x7ff) as u16;
76
77    const EXP_BIAS: u16 = (1u16 << (11 - 1u16)) - 1u16;
78
79    if x_exp >= EXP_BIAS {
80        // |x| >= 1
81        if x_u >= 0x4650_0000_0000_0000u64 {
82            x_dd.hi = x;
83        } else {
84            x_dd = DoubleDouble::from_exact_add(x, 1.0);
85        }
86    } else {
87        // |x| < 1
88        x_dd = DoubleDouble::from_exact_add(1.0, x);
89    }
90
91    // At this point, x_dd is the exact sum of 1 + x:
92    //   x_dd.hi + x_dd.lo = x + 1.0 exactly.
93    //   |x_dd.hi| >= 2^-54
94    //   |x_dd.lo| < ulp(x_dd.hi)
95
96    let xhi_bits = x_dd.hi.to_bits();
97    let xhi_frac = xhi_bits & ((1u64 << 52) - 1);
98    x_u = xhi_bits;
99    // Range reduction:
100    // Find k such that |x_hi - k * 2^-7| <= 2^-8.
101    let idx: i32 = ((xhi_frac.wrapping_add(1u64 << (52 - 8))) >> (52 - 7)) as i32;
102    let x_e = (get_exponent_f64(f64::from_bits(xhi_bits)) as i32).wrapping_add(idx >> 7);
103    let e_x = x_e as f64;
104
105    const LOG_2: DoubleDouble = DoubleDouble::new(
106        f64::from_bits(0x3d2ef35793c76730),
107        f64::from_bits(0x3fe62e42fefa3800),
108    );
109
110    // hi is exact
111    // ulp(hi) = ulp(LOG_2_HI) = ulp(LOG_R1_DD[idx].hi) = 2^-43
112
113    let r_dd = LOG_R1_DD[idx as usize];
114
115    let hi = f_fmla(e_x, LOG_2.hi, f64::from_bits(r_dd.1));
116    // lo errors < |e_x| * ulp(LOG_2_LO) + ulp(LOG_R1[idx].lo)
117    //           <= 2^11 * 2^(-43-53) = 2^-85
118    let lo = f_fmla(e_x, LOG_2.lo, f64::from_bits(r_dd.0));
119
120    // Scale x_dd by 2^(-xh_bits.get_exponent()).
121    let s_u: i64 = (x_u & EXP_MASK) as i64 - (EXP_BIAS as i64).wrapping_shl(52);
122    // Normalize arguments:
123    //   1 <= m_dd.hi < 2
124    //   |m_dd.lo| < 2^-52.
125    // This is exact.
126    let m_hi = 1f64.to_bits() | xhi_frac;
127
128    let m_lo = if x_dd.lo.abs() > x_dd.hi * f64::from_bits(0x3800000000000000) {
129        (x_dd.lo.to_bits() as i64).wrapping_sub(s_u)
130    } else {
131        0
132    };
133
134    let m_dd = DoubleDouble::new(f64::from_bits(m_lo as u64), f64::from_bits(m_hi));
135
136    // Perform range reduction:
137    //   r * m - 1 = r * (m_dd.hi + m_dd.lo) - 1
138    //             = (r * m_dd.hi - 1) + r * m_dd.lo
139    //             = v_hi + (v_lo.hi + v_lo.lo)
140    // where:
141    //   v_hi = r * m_dd.hi - 1          (exact)
142    //   v_lo.hi + v_lo.lo = r * m_dd.lo (exact)
143    // Bounds on the values:
144    //   -0x1.69000000000edp-8 < r * m - 1 < 0x1.7f00000000081p-8
145    //   |v_lo.hi| <= |r| * |m_dd.lo| < 2^-52
146    //   |v_lo.lo| < ulp(v_lo.hi) <= 2^(-52 - 53) = 2^(-105)
147    let r = R1[idx as usize];
148    let v_hi;
149    let v_lo = DoubleDouble::from_exact_mult(m_dd.lo, f64::from_bits(r));
150
151    #[cfg(any(
152        all(
153            any(target_arch = "x86", target_arch = "x86_64"),
154            target_feature = "fma"
155        ),
156        all(target_arch = "aarch64", target_feature = "neon")
157    ))]
158    {
159        v_hi = f_fmla(f64::from_bits(r), m_dd.hi, -1.0); // Exact.
160    }
161
162    #[cfg(not(any(
163        all(
164            any(target_arch = "x86", target_arch = "x86_64"),
165            target_feature = "fma"
166        ),
167        all(target_arch = "aarch64", target_feature = "neon")
168    )))]
169    {
170        use crate::logs::log1p::RCM1;
171        let c = f64::from_bits(
172            (idx as u64)
173                .wrapping_shl(52 - 7)
174                .wrapping_add(0x3FF0_0000_0000_0000u64),
175        );
176        v_hi = f_fmla(
177            f64::from_bits(r),
178            m_dd.hi - c,
179            f64::from_bits(RCM1[idx as usize]),
180        ); // Exact
181    }
182
183    // Range reduction output:
184    //   -0x1.69000000000edp-8 < v_hi + v_lo < 0x1.7f00000000081p-8
185    //   |v_dd.lo| < ulp(v_dd.hi) <= 2^(-7 - 53) = 2^-60
186    let mut v_dd = DoubleDouble::from_exact_add(v_hi, v_lo.hi);
187    v_dd.lo += v_lo.lo;
188
189    // Exact sum:
190    //   r1.hi + r1.lo = e_x * log(2)_hi - log(r)_hi + u
191    let mut r1 = DoubleDouble::from_exact_add(hi, v_dd.hi);
192
193    // Degree-7 minimax polynomial log(1 + v) ~ v - v^2 / 2 + ...
194    // generated by Sollya with:
195    // > P = fpminimax(log(1 + x)/x, 6, [|1, 1, D...|],
196    //                 [-0x1.69000000000edp-8, 0x1.7f00000000081p-8]);
197    const P_COEFFS: [u64; 6] = [
198        0xbfe0000000000000,
199        0x3fd5555555555166,
200        0xbfcfffffffdb7746,
201        0x3fc99999a8718a60,
202        0xbfc555874ce8ce22,
203        0x3fc24335555ddbe5,
204    ];
205
206    //   C * ulp(v_sq) + err_hi
207    let v_sq = v_dd.hi * v_dd.hi;
208    let p0 = f_fmla(
209        v_dd.hi,
210        f64::from_bits(P_COEFFS[1]),
211        f64::from_bits(P_COEFFS[0]),
212    );
213    let p1 = f_fmla(
214        v_dd.hi,
215        f64::from_bits(P_COEFFS[3]),
216        f64::from_bits(P_COEFFS[2]),
217    );
218    let p2 = f_fmla(
219        v_dd.hi,
220        f64::from_bits(P_COEFFS[5]),
221        f64::from_bits(P_COEFFS[4]),
222    );
223    let p = f_polyeval4(v_sq, (v_dd.lo + r1.lo) + lo, p0, p1, p2);
224
225    const ERR_HI: [f64; 2] = [f64::from_bits(0x3aa0000000000000), 0.0];
226    let err_hi = ERR_HI[if hi == 0.0 { 1 } else { 0 }];
227
228    let err = f_fmla(v_sq, f64::from_bits(0x3ce0000000000000), err_hi);
229
230    r1.lo = p;
231    r1 = DoubleDouble::from_exact_add(r1.hi, r1.lo);
232    r1 = DoubleDouble::full_add_f64(r1, -x);
233
234    let left = r1.hi + (r1.lo - err);
235    let right = r1.hi + (r1.lo + err);
236    // Ziv's test to see if fast pass is accurate enough.
237    if left == right {
238        return left;
239    }
240    log1pmx_accurate_dd(x)
241}
242
243#[cold]
244fn log1pmx_accurate_dd(x: f64) -> f64 {
245    let r = log1p_dd(x);
246    DoubleDouble::full_add_f64(r, -x).to_f64()
247}
248
249/// Computes log(1+x) - x
250///
251/// ulp 0.5
252pub fn f_log1pmx(x: f64) -> f64 {
253    let ax = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
254
255    let x_e = (x.to_bits() >> 52) & 0x7ff;
256
257    if !x.is_normal() {
258        if x.is_nan() {
259            return x + x;
260        }
261        if x.is_infinite() {
262            return f64::NAN;
263        }
264        if x == 0. {
265            return x;
266        }
267    }
268
269    if ax > 0x3f90000000000000u64 {
270        // |x| > 2^-6
271        if x <= -1. {
272            if x == -1. {
273                return f64::NEG_INFINITY;
274            }
275            return f64::NAN;
276        }
277        return log1pmx_big(x);
278    }
279
280    const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
281
282    // log(1+x) is expected to be used near zero
283
284    if x_e < E_BIAS - 10 {
285        if x_e < E_BIAS - 100 {
286            // |x| < 2^-100
287            // taylor series log(1+x) - x ~ -x^2/2 + x^3/3
288            let x2 = x * x;
289            let dx2 = f_fmla(x, x, -x2);
290            let rl = dx2 * -0.5;
291            return f_fmla(x2, -0.5, rl);
292        }
293
294        // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
295        // d = [-2^-10; 2^-10];
296        // f_log1pf = (log(1+x) - x)/x^2;
297        // Q = fpminimax(f_log1pf, 7, [|0, 107, 107, D...|], d);
298        // See ./notes/log1pmx_at_zero.sollya
299
300        let p = f_polyeval5(
301            x,
302            f64::from_bits(0x3fc999999999999a),
303            f64::from_bits(0xbfc55555555546ef),
304            f64::from_bits(0x3fc24924923d3abf),
305            f64::from_bits(0xbfc000018af7f637),
306            f64::from_bits(0x3fbc72984db24b6a),
307        );
308
309        let x2 = DoubleDouble::from_exact_mult(x, x);
310
311        let mut r = DoubleDouble::f64_mul_f64_add(
312            x,
313            p,
314            DoubleDouble::from_bit_pair((0xbbd3330899095800, 0xbfd0000000000000)),
315        );
316        r = DoubleDouble::mul_f64_add(
317            r,
318            x,
319            DoubleDouble::from_bit_pair((0x3c75555538509407, 0x3fd5555555555555)),
320        );
321        r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
322        r = DoubleDouble::quick_mult(r, x2);
323        const ERR: f64 = f64::from_bits(0x3af0000000000000); // 2^-80
324        let ub = r.hi + (r.lo + ERR);
325        let lb = r.hi + (r.lo - ERR);
326        if lb == ub {
327            return r.to_f64();
328        }
329        return tiny_hard(x);
330    }
331
332    // Polynomial generated by Sollya in form log(1+x) - x = x^2 * R(x):
333    // d = [-2^-6; 2^-6];
334    // f_log1pf = (log(1+x) - x)/x^2;
335    // Q = fpminimax(f_log1pf, 10, [|0, 107, 107, D...|], d);
336    // See ./notes/log1pmx.sollya
337
338    let p = f_estrin_polyeval8(
339        x,
340        f64::from_bits(0x3fc9999999999997),
341        f64::from_bits(0xbfc5555555555552),
342        f64::from_bits(0x3fc249249249fb64),
343        f64::from_bits(0xbfc000000000f450),
344        f64::from_bits(0x3fbc71c6e2591149),
345        f64::from_bits(0xbfb999995cf14d86),
346        f64::from_bits(0x3fb7494eb6c2c544),
347        f64::from_bits(0xbfb558bf05690e85),
348    );
349
350    let x2 = DoubleDouble::from_exact_mult(x, x);
351
352    let mut r = DoubleDouble::f64_mul_f64_add(
353        x,
354        p,
355        DoubleDouble::from_bit_pair((0xbb9d89dc15a38000, 0xbfd0000000000000)),
356    );
357    r = DoubleDouble::mul_f64_add(
358        r,
359        x,
360        DoubleDouble::from_bit_pair((0x3c7555a15a94e505, 0x3fd5555555555555)),
361    );
362    r = DoubleDouble::mul_f64_add_f64(r, x, f64::from_bits(0xbfe0000000000000));
363    r = DoubleDouble::quick_mult(r, x2);
364
365    r.to_f64()
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    #[test]
373    fn test_log1pmx() {
374        assert_eq!(
375            f_log1pmx(0.00000000000000005076835735015165),
376            -0.0000000000000000000000000000000012887130540163486
377        );
378        assert_eq!(f_log1pmx(5.), -3.208240530771945);
379        assert_eq!(f_log1pmx(-0.99), -3.6151701859880907);
380        assert_eq!(f_log1pmx(-1.), f64::NEG_INFINITY);
381        assert_eq!(
382            f_log1pmx(1.0000000000008708e-13),
383            -0.0000000000000000000000000050000000000083744
384        );
385        assert_eq!(
386            f_log1pmx(1.0000000000008708e-26),
387            -0.00000000000000000000000000000000000000000000000000005000000000008707
388        );
389        assert_eq!(f_log1pmx(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012890176556069385),
390                   -0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000830783258233204);
391    }
392}