pxfm/logs/
log1p_dyadic.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::double_double::DoubleDouble;
30use crate::dyadic_float::{DyadicFloat128, DyadicSign};
31use crate::logs::log1p_dyadic_tables::{LOG1P_R1, LOG1P_R2, LOG1P_S2, LOG1P_S3, LOGP1_R3};
32
33#[cold]
34pub(crate) fn log1p_accurate(e_x: i32, index: usize, m_x: DoubleDouble) -> DyadicFloat128 {
35    // Minimax polynomial generated by Sollya with:
36    // > P = fpminimax((log(1 + x) - x)/x^2, 3, [|1, 128...|],
37    //                 [-0x1.01928p-22 , 0x1p-22]);
38    // > P;
39    // > dirtyinfnorm(log(1 + x)/x - 1 - x*P, [-0x1.01928p-22 , 0x1p-22]);
40    // 0x1.ce1e...p-116
41    const BIG_COEFFS: [DyadicFloat128; 4] = [
42        DyadicFloat128 {
43            sign: DyadicSign::Pos,
44            exponent: -130,
45            mantissa: 0xccccccd7_4818e397_7ed78465_d460315b_u128,
46        },
47        DyadicFloat128 {
48            sign: DyadicSign::Neg,
49            exponent: -129,
50            mantissa: 0x80000000_000478b0_c6388a23_871ce156_u128,
51        },
52        DyadicFloat128 {
53            sign: DyadicSign::Pos,
54            exponent: -129,
55            mantissa: 0xaaaaaaaa_aaaaaaaa_aa807bd8_67763262_u128,
56        },
57        DyadicFloat128 {
58            sign: DyadicSign::Neg,
59            exponent: -128,
60            mantissa: 0x80000000_00000000_00000000_00000000_u128,
61        },
62    ];
63
64    // log(2) with 128-bit precision generated by SageMath with:
65    // def format_hex(value):
66    //     l = hex(value)[2:]
67    //     n = 4
68    //     x = [l[i:i + n] for i in range(0, len(l), n)]
69    //     return "0x" + "_".join(x) + "_u128"
70    // (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent();
71    // print(format_hex(m));
72    const LOG_2: DyadicFloat128 = DyadicFloat128 {
73        sign: DyadicSign::Pos,
74        exponent: -128,
75        mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
76    };
77
78    let e_x_f128 = DyadicFloat128::new_from_f64(e_x as f64);
79    let mut sum = LOG_2 * e_x_f128;
80    sum = sum + LOG1P_R1[index];
81
82    let mut v = DyadicFloat128::new_from_f64(m_x.hi) + DyadicFloat128::new_from_f64(m_x.lo);
83
84    // Skip 2nd range reduction step if |m_x| <= 2^-15.
85    if m_x.hi > f64::from_bits(0x3f00000000000000) || m_x.hi < f64::from_bits(0xbf00000000000000) {
86        // Range reduction - Step 2.
87        // For k such that: k * 2^-14 - 2^-15 <= m_x.hi < k * 2^-14 + 2^-15,
88        // Let s_k = 2^-18 * round( 2^18 / (1 + k*2^-14) ) - 1
89        // Then the 2nd reduced argument is:
90        //    (1 + s_k) * (1 + m_x) - 1 =
91        //  = s_k + m_x + s_k * m_x
92        // Output range:
93        //   -0x1.1037c00000040271p-15 <= v2.hi + v2.lo <= 0x1.108480000008096cp-15
94        let idx2: i32 = (f64::from_bits(0x40d0000000000000)
95            * (m_x.hi
96                + (91. * f64::from_bits(0x3f10000000000000) + f64::from_bits(0x3f00000000000000))))
97            as i32;
98        sum = sum + LOG1P_R2[idx2 as usize];
99        let s2 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S2[idx2 as usize]));
100        v = (v + s2) + (v * s2);
101    }
102
103    // Skip 3rd range reduction step if |v| <= 2^-22.
104    if v.exponent > -150 {
105        // Range reduction - Step 3.
106        // For k such that: k * 2^-21 - 2^-22 <= v2.hi < k * 2^-21 + 2^-22,
107        // Let s_k = 2^-21 * round( 2^21 / (1 + k*2^-21) ) - 1
108        // Then the 3rd reduced argument is:
109        //    v3.hi + v3.lo ~ (1 + s_k) * (1 + v2.hi + v2.lo) - 1
110        // Output range:
111        //   -0x1.012bb800000800114p-22 <= v3.hi + v3.lo <= 0x1p-22
112        let zv = v.fast_as_f64();
113        let idx3 = (f64::from_bits(0x4140000000000000)
114            * (zv
115                + (69. * f64::from_bits(0x3ea0000000000000) + f64::from_bits(0x3e90000000000000))))
116            as i32;
117        sum = sum + LOGP1_R3[idx3 as usize];
118        let s3 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S3[idx3 as usize]));
119        v = (v + s3) + (v * s3);
120    }
121
122    let mut p = v * BIG_COEFFS[0];
123    p = v * (p + BIG_COEFFS[1]);
124    p = v * (p + BIG_COEFFS[2]);
125    p = v * (p + BIG_COEFFS[3]);
126    p = v + (v * p);
127
128    sum + p
129}