pxfm/logs/log1p_dyadic.rs
1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1. Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2. Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3. Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::double_double::DoubleDouble;
30use crate::dyadic_float::{DyadicFloat128, DyadicSign};
31use crate::logs::log1p_dyadic_tables::{LOG1P_R1, LOG1P_R2, LOG1P_S2, LOG1P_S3, LOGP1_R3};
32
33#[cold]
34pub(crate) fn log1p_accurate(e_x: i32, index: usize, m_x: DoubleDouble) -> DyadicFloat128 {
35 // Minimax polynomial generated by Sollya with:
36 // > P = fpminimax((log(1 + x) - x)/x^2, 3, [|1, 128...|],
37 // [-0x1.01928p-22 , 0x1p-22]);
38 // > P;
39 // > dirtyinfnorm(log(1 + x)/x - 1 - x*P, [-0x1.01928p-22 , 0x1p-22]);
40 // 0x1.ce1e...p-116
41 const BIG_COEFFS: [DyadicFloat128; 4] = [
42 DyadicFloat128 {
43 sign: DyadicSign::Pos,
44 exponent: -130,
45 mantissa: 0xccccccd7_4818e397_7ed78465_d460315b_u128,
46 },
47 DyadicFloat128 {
48 sign: DyadicSign::Neg,
49 exponent: -129,
50 mantissa: 0x80000000_000478b0_c6388a23_871ce156_u128,
51 },
52 DyadicFloat128 {
53 sign: DyadicSign::Pos,
54 exponent: -129,
55 mantissa: 0xaaaaaaaa_aaaaaaaa_aa807bd8_67763262_u128,
56 },
57 DyadicFloat128 {
58 sign: DyadicSign::Neg,
59 exponent: -128,
60 mantissa: 0x80000000_00000000_00000000_00000000_u128,
61 },
62 ];
63
64 // log(2) with 128-bit precision generated by SageMath with:
65 // def format_hex(value):
66 // l = hex(value)[2:]
67 // n = 4
68 // x = [l[i:i + n] for i in range(0, len(l), n)]
69 // return "0x" + "_".join(x) + "_u128"
70 // (s, m, e) = RealField(128)(2).log().sign_mantissa_exponent();
71 // print(format_hex(m));
72 const LOG_2: DyadicFloat128 = DyadicFloat128 {
73 sign: DyadicSign::Pos,
74 exponent: -128,
75 mantissa: 0xb172_17f7_d1cf_79ab_c9e3_b398_03f2_f6af_u128,
76 };
77
78 let e_x_f128 = DyadicFloat128::new_from_f64(e_x as f64);
79 let mut sum = LOG_2 * e_x_f128;
80 sum = sum + LOG1P_R1[index];
81
82 let mut v = DyadicFloat128::new_from_f64(m_x.hi) + DyadicFloat128::new_from_f64(m_x.lo);
83
84 // Skip 2nd range reduction step if |m_x| <= 2^-15.
85 if m_x.hi > f64::from_bits(0x3f00000000000000) || m_x.hi < f64::from_bits(0xbf00000000000000) {
86 // Range reduction - Step 2.
87 // For k such that: k * 2^-14 - 2^-15 <= m_x.hi < k * 2^-14 + 2^-15,
88 // Let s_k = 2^-18 * round( 2^18 / (1 + k*2^-14) ) - 1
89 // Then the 2nd reduced argument is:
90 // (1 + s_k) * (1 + m_x) - 1 =
91 // = s_k + m_x + s_k * m_x
92 // Output range:
93 // -0x1.1037c00000040271p-15 <= v2.hi + v2.lo <= 0x1.108480000008096cp-15
94 let idx2: i32 = (f64::from_bits(0x40d0000000000000)
95 * (m_x.hi
96 + (91. * f64::from_bits(0x3f10000000000000) + f64::from_bits(0x3f00000000000000))))
97 as i32;
98 sum = sum + LOG1P_R2[idx2 as usize];
99 let s2 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S2[idx2 as usize]));
100 v = (v + s2) + (v * s2);
101 }
102
103 // Skip 3rd range reduction step if |v| <= 2^-22.
104 if v.exponent > -150 {
105 // Range reduction - Step 3.
106 // For k such that: k * 2^-21 - 2^-22 <= v2.hi < k * 2^-21 + 2^-22,
107 // Let s_k = 2^-21 * round( 2^21 / (1 + k*2^-21) ) - 1
108 // Then the 3rd reduced argument is:
109 // v3.hi + v3.lo ~ (1 + s_k) * (1 + v2.hi + v2.lo) - 1
110 // Output range:
111 // -0x1.012bb800000800114p-22 <= v3.hi + v3.lo <= 0x1p-22
112 let zv = v.fast_as_f64();
113 let idx3 = (f64::from_bits(0x4140000000000000)
114 * (zv
115 + (69. * f64::from_bits(0x3ea0000000000000) + f64::from_bits(0x3e90000000000000))))
116 as i32;
117 sum = sum + LOGP1_R3[idx3 as usize];
118 let s3 = DyadicFloat128::new_from_f64(f64::from_bits(LOG1P_S3[idx3 as usize]));
119 v = (v + s3) + (v * s3);
120 }
121
122 let mut p = v * BIG_COEFFS[0];
123 p = v * (p + BIG_COEFFS[1]);
124 p = v * (p + BIG_COEFFS[2]);
125 p = v * (p + BIG_COEFFS[3]);
126 p = v + (v * p);
127
128 sum + p
129}