pxfm/logs/
log1p_dd.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::dd_fmla;
30use crate::double_double::DoubleDouble;
31use crate::logs::log_dd::log_poly;
32use crate::logs::log_dd_coeffs::LOG_NEG_DD;
33use crate::polyeval::{f_estrin_polyeval7, f_polyeval3};
34use crate::pow_tables::POW_INVERSE;
35
36#[inline(always)]
37pub(crate) fn log1p_tiny(z: f64) -> DoubleDouble {
38    // See ./notes/log1p_tiny.sollya for generation
39    const Q_1: [(u64, u64); 7] = [
40        (0xbc85555555555555, 0x3fd5555555555556),
41        (0x0000000000000000, 0xbfd0000000000000),
42        (0xbc6999999999999a, 0x3fc999999999999a),
43        (0x3c75555555555555, 0xbfc5555555555556),
44        (0x3c62492492492492, 0x3fc2492492492492),
45        (0x0000000000000000, 0xbfc0000000000000),
46        (0x3c5c71c71c71c71c, 0x3fbc71c71c71c71c),
47    ];
48    let mut r = DoubleDouble::quick_mul_f64_add_f64(
49        DoubleDouble::from_bit_pair(Q_1[6]),
50        z,
51        f64::from_bits(0xbfc0000000000000),
52    );
53    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(Q_1[4]));
54    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(Q_1[3]));
55    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(Q_1[2]));
56    r = DoubleDouble::quick_mul_f64_add_f64(r, z, f64::from_bits(0xbfd0000000000000));
57    r = DoubleDouble::quick_mul_f64_add(r, z, DoubleDouble::from_bit_pair(Q_1[0]));
58    r = DoubleDouble::quick_mul_f64_add_f64(r, z, f64::from_bits(0xbfe0000000000000));
59    r = DoubleDouble::quick_mul_f64_add_f64(r, z, f64::from_bits(0x3ff0000000000000));
60    DoubleDouble::quick_mult_f64(r, z)
61}
62
63#[inline(always)]
64fn log1p_poly_fast(z: f64) -> DoubleDouble {
65    // Polynomial generated by Sollya
66    // d = [-0.0040283203125,0.0040283203125];
67    // f = log(1+x);
68    // p0 = x-x^2/2;
69    // w = 1;
70    // pf = p0+fpminimax(f-p0, [|3,4,5,6,7,8,9|], [|D...|], d, absolute, floating);
71    // See ./notes/dd_log1p_fast.sollya
72    const Q: [f64; 7] = [
73        f64::from_bits(0x3fd5555555555556),
74        f64::from_bits(0xbfcfffffffffffdc),
75        f64::from_bits(0x3fc99999998fd488),
76        f64::from_bits(0xbfc5555555d90fc7),
77        f64::from_bits(0x3fc24936d06d3bf8),
78        f64::from_bits(0xbfbfff46726d8e88),
79        f64::from_bits(0x3fa1847e2faea348),
80    ];
81    let x2 = DoubleDouble::from_exact_mult(z, z);
82    let p = f_estrin_polyeval7(z, Q[0], Q[1], Q[2], Q[3], Q[4], Q[5], Q[6]);
83    let mut t = DoubleDouble::quick_mult_f64(x2, p);
84    t = DoubleDouble::quick_mult_f64(t, z);
85    DoubleDouble::mul_f64_add(x2, -0.5, t)
86}
87
88#[inline(always)]
89fn log1p_tiny_fast(z: f64) -> DoubleDouble {
90    // Polynomial generated by Sollya
91    // d = [-2^-12,2^-12];
92    // f = log(1+x);
93    // p0 = x-x^2/2;
94    // w = 1;
95    // pf = p0+fpminimax(f-p0, [|3,4,5,6|], [|107, D...|], d, absolute, floating);
96    // See ./notes/dd_log1p_tiny_fast.sollya
97    let x2 = DoubleDouble::from_exact_mult(z, z);
98    let p = f_polyeval3(
99        z,
100        f64::from_bits(0xbfcffffffffffff4),
101        f64::from_bits(0x3fc99999b4d2481f),
102        f64::from_bits(0xbfc55555714a3cb8),
103    );
104    let DoubleDouble { hi: h, lo: r } = DoubleDouble::from_exact_mult(z, p);
105    let DoubleDouble { hi: ph, lo: q } = DoubleDouble::add_f64(
106        DoubleDouble::from_bit_pair((0xbc77e8068b994170, 0x3fd5555555555551)),
107        h,
108    );
109    let p = DoubleDouble::new(r + q, ph);
110    let mut t = DoubleDouble::quick_mult(x2, p);
111    t = DoubleDouble::quick_mult_f64(t, z);
112    let f = DoubleDouble::f64_add(z, DoubleDouble::mul_f64_add(x2, -0.5, t));
113    DoubleDouble::from_exact_add(f.hi, f.lo)
114}
115
116#[inline]
117pub(crate) fn log1p_dd(z: f64) -> DoubleDouble {
118    let ax = z.to_bits().wrapping_shl(1);
119    if ax < 0x7e60000000000000u64 {
120        // |x| < 0x1p-12
121        return log1p_tiny(z);
122    }
123    let dz = DoubleDouble::from_full_exact_add(z, 1.0);
124
125    // We'll compute log((z+1)+1) as log(xh+xl) = log(xh) + log(1+xl/xh).
126    // since xl/xh < ulp(xh) we'll use for log(1+xl/xh)
127    // one taylor term what means that log(1+xl/xh) = log_lo + O(x^2)
128
129    let log_lo = if dz.hi <= f64::from_bits(0x7fd0000000000000) || dz.lo.abs() >= 4.0 {
130        dz.lo / dz.hi
131    } else {
132        0.
133    }; // avoid spurious underflow
134
135    let x_u = dz.hi.to_bits();
136    let mut m = x_u & 0xfffffffffffff;
137    let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;
138
139    let t;
140    if e != 0 {
141        t = m | (0x3ffu64 << 52);
142        m = m.wrapping_add(1u64 << 52);
143        e -= 0x3ff;
144    } else {
145        /* x is a subnormal double  */
146        let k = m.leading_zeros() - 11;
147
148        e = -0x3fei64 - k as i64;
149        m = m.wrapping_shl(k);
150        t = m | (0x3ffu64 << 52);
151    }
152
153    /* now |x| = 2^_e*_t = 2^(_e-52)*m with 1 <= _t < 2,
154    and 2^52 <= _m < 2^53 */
155
156    //   log(x) = log(t) + E · log(2)
157    let mut t = f64::from_bits(t);
158
159    // If m > sqrt(2) we divide it by 2 so ensure 1/sqrt(2) < t < sqrt(2)
160    let c: usize = (m >= 0x16a09e667f3bcd) as usize;
161    static CY: [f64; 2] = [1.0, 0.5];
162    static CM: [u64; 2] = [44, 45];
163
164    e = e.wrapping_add(c as i64);
165    let be = e;
166    let i = m >> CM[c];
167    t *= CY[c];
168
169    let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
170    let log_r = DoubleDouble::from_bit_pair(LOG_NEG_DD[(i - 181) as usize]);
171
172    let z = f64::mul_add(r, t, -1.0);
173
174    const LOG2_DD: DoubleDouble = DoubleDouble::new(
175        f64::from_bits(0x3c7abc9e3b39803f),
176        f64::from_bits(0x3fe62e42fefa39ef),
177    );
178
179    let tt = DoubleDouble::mul_f64_add(LOG2_DD, be as f64, log_r);
180
181    let v = DoubleDouble::full_add_f64(tt, z);
182    let mut p = log_poly(z);
183    // adding log(1+xl/xh) lower term
184    p.lo += log_lo;
185    DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo, p.hi))
186}
187
188#[inline]
189pub(crate) fn log1p_fast_dd(z: f64) -> DoubleDouble {
190    let ax = z.to_bits().wrapping_shl(1);
191    if ax < 0x7e60000000000000u64 {
192        // |x| < 2^-12
193        return log1p_tiny_fast(z);
194    }
195    let dz = DoubleDouble::from_full_exact_add(z, 1.0);
196
197    // We'll compute log((z+1)+1) as log(xh+xl) = log(xh) + log(1+xl/xh).
198    // since xl/xh < ulp(xh) we'll use for log(1+xl/xh)
199    // one taylor term what means that log(1+xl/xh) = log_lo + O(x^2)
200
201    let log_lo = if dz.hi <= f64::from_bits(0x7fd0000000000000) || dz.lo.abs() >= 4.0 {
202        dz.lo / dz.hi
203    } else {
204        0.
205    }; // avoid spurious underflow
206
207    let x_u = dz.hi.to_bits();
208    let mut m = x_u & 0xfffffffffffff;
209    let mut e: i64 = ((x_u >> 52) & 0x7ff) as i64;
210
211    let t;
212    if e != 0 {
213        t = m | (0x3ffu64 << 52);
214        m = m.wrapping_add(1u64 << 52);
215        e -= 0x3ff;
216    } else {
217        /* x is a subnormal double  */
218        let k = m.leading_zeros() - 11;
219
220        e = -0x3fei64 - k as i64;
221        m = m.wrapping_shl(k);
222        t = m | (0x3ffu64 << 52);
223    }
224
225    /* now |x| = 2^_e*_t = 2^(_e-52)*m with 1 <= _t < 2,
226    and 2^52 <= _m < 2^53 */
227
228    //   log(x) = log(t) + E · log(2)
229    let mut t = f64::from_bits(t);
230
231    // If m > sqrt(2) we divide it by 2 so ensure 1/sqrt(2) < t < sqrt(2)
232    let c: usize = (m >= 0x16a09e667f3bcd) as usize;
233    static CY: [f64; 2] = [1.0, 0.5];
234    static CM: [u64; 2] = [44, 45];
235
236    e = e.wrapping_add(c as i64);
237    let be = e;
238    let i = m >> CM[c];
239    t *= CY[c];
240
241    let r = f64::from_bits(POW_INVERSE[(i - 181) as usize]);
242    let log_r = DoubleDouble::from_bit_pair(LOG_NEG_DD[(i - 181) as usize]);
243
244    let z = dd_fmla(r, t, -1.0);
245
246    const LOG2_DD: DoubleDouble = DoubleDouble::new(
247        f64::from_bits(0x3c7abc9e3b39803f),
248        f64::from_bits(0x3fe62e42fefa39ef),
249    );
250
251    let tt = DoubleDouble::mul_f64_add(LOG2_DD, be as f64, log_r);
252
253    let v = DoubleDouble::full_add_f64(tt, z);
254    let mut p = log1p_poly_fast(z);
255    // adding log(1+xl/xh) lower term
256    p.lo += log_lo;
257    DoubleDouble::f64_add(v.hi, DoubleDouble::new(v.lo + p.lo, p.hi))
258}