pxfm/exponents/
exp10m1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::exp2m1::{EXP_M1_2_TABLE1, EXP_M1_2_TABLE2};
32use crate::rounding::CpuFloor;
33use crate::rounding::CpuRoundTiesEven;
34
35const LN10H: f64 = f64::from_bits(0x40026bb1bbb55516);
36const LN10L: f64 = f64::from_bits(0xbcaf48ad494ea3e9);
37
38struct Exp10m1 {
39    exp: DoubleDouble,
40    err: f64,
41}
42
43// Approximation for the fast path of exp(z) for z=zh+zl,
44// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
45// (assuming x^y does not overflow or underflow)
46#[inline]
47fn q_1(dz: DoubleDouble) -> DoubleDouble {
48    const Q_1: [u64; 5] = [
49        0x3ff0000000000000,
50        0x3ff0000000000000,
51        0x3fe0000000000000,
52        0x3fc5555555995d37,
53        0x3fa55555558489dc,
54    ];
55    let z = dz.to_f64();
56    let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
57    q = f_fmla(q, z, f64::from_bits(Q_1[2]));
58
59    let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
60    p0 = DoubleDouble::quick_mult(dz, p0);
61    p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
62    p0
63}
64
65#[inline]
66fn exp1(x: DoubleDouble) -> DoubleDouble {
67    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
68    let k = (x.hi * INVLOG2).cpu_round_ties_even();
69
70    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
71    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
72    let mut zk = DoubleDouble::from_exact_mult(LOG2H, k);
73    zk.lo = f_fmla(k, LOG2L, zk.lo);
74
75    let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
76    yz.lo -= zk.lo;
77
78    let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
79    let im: i64 = (ik >> 12).wrapping_add(0x3ff);
80    let i2: i64 = (ik >> 6) & 0x3f;
81    let i1: i64 = ik & 0x3f;
82
83    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
84    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
85
86    let p0 = DoubleDouble::quick_mult(t2, t1);
87
88    let mut q = q_1(yz);
89    q = DoubleDouble::quick_mult(p0, q);
90
91    /* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus
92    M = 2047, which encodes Inf */
93    let mut du = (im as u64).wrapping_shl(52);
94    if im == 0x7ff {
95        q.hi *= 2.0;
96        q.lo *= 2.0;
97        du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
98    }
99    q.hi *= f64::from_bits(du);
100    q.lo *= f64::from_bits(du);
101    q
102}
103
104#[inline]
105fn exp10m1_fast(x: f64, tiny: bool) -> Exp10m1 {
106    if tiny {
107        return exp10m1_fast_tiny(x);
108    }
109    /* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2))
110    and subtract 1 */
111    let v = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
112    /*
113    The a_mul() call is exact, and the error of the fma() is bounded by
114     ulp(l).
115     We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43,
116     |t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7,
117     thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L)
118              <= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95.
119     Thus:
120     |h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)|
121                        <= 2^-95 + 1024*2^-110.4 < 2^-94.9 */
122
123    let mut p = exp1(v);
124
125    let zf: DoubleDouble = if x >= 0. {
126        // implies h >= 1 and the fast_two_sum pre-condition holds
127        DoubleDouble::from_exact_add(p.hi, -1.0)
128    } else {
129        DoubleDouble::from_exact_add(-1.0, p.hi)
130    };
131    p.lo += zf.lo;
132    p.hi = zf.hi;
133    /* The error in the above fast_two_sum is bounded by 2^-105*|h|,
134    with the new value of h, thus the total absolute error is bounded
135    by eps1*|h_in|+2^-105*|h|.
136    Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum
137    of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05.
138    We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */
139    Exp10m1 {
140        exp: p,
141        err: f64::from_bits(0x3b77a00000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */
142    }
143}
144
145// Approximation for the accurate path of exp(z) for z=zh+zl,
146// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
147// (assuming x^y does not overflow or underflow)
148#[inline]
149fn q_2(dz: DoubleDouble) -> DoubleDouble {
150    /* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2.
151    The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7
152    in double precision only.
153    The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6
154    in double precision only.
155    The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5
156    in double precision only. */
157
158    /* The following is a degree-7 polynomial generated by Sollya for exp(z)
159    over [-0.000130273,0.000130273] with absolute error < 2^-113.218
160    (see file exp_accurate.sollya). Since we use this code only for
161    |x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1
162    is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */
163    const Q_2: [u64; 9] = [
164        0x3ff0000000000000,
165        0x3ff0000000000000,
166        0x3fe0000000000000,
167        0x3fc5555555555555,
168        0x3c655555555c4d26,
169        0x3fa5555555555555,
170        0x3f81111111111111,
171        0x3f56c16c3fbb4213,
172        0x3f2a01a023ede0d7,
173    ];
174
175    let z = dz.to_f64();
176    let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
177    q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
178    q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
179
180    // multiply q by z and add Q_2[3] + Q_2[4]
181
182    let mut p = DoubleDouble::from_exact_mult(q, z);
183    let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
184    p.hi = r0.hi;
185    p.lo += r0.lo + f64::from_bits(Q_2[4]);
186
187    // multiply hi+lo by zh+zl and add Q_2[2]
188
189    p = DoubleDouble::quick_mult(p, dz);
190    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
191    p.hi = r1.hi;
192    p.lo += r1.lo;
193
194    // multiply hi+lo by zh+zl and add Q_2[1]
195    p = DoubleDouble::quick_mult(p, dz);
196    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
197    p.hi = r1.hi;
198    p.lo += r1.lo;
199
200    // multiply hi+lo by zh+zl and add Q_2[0]
201    p = DoubleDouble::quick_mult(p, dz);
202    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
203    p.hi = r1.hi;
204    p.lo += r1.lo;
205    p
206}
207
208// returns a double-double approximation hi+lo of exp(x*log(10))
209// assumes -0x1.041704c068efp+4 < x <= 0x1.34413509f79fep+8
210#[inline]
211fn exp_2(x: f64) -> DoubleDouble {
212    let mut k = (x * f64::from_bits(0x40ca934f0979a371)).cpu_round_ties_even();
213    if k == 4194304. {
214        k = 4194303.; // ensures M < 2047 below
215    }
216    // since |x| <= 745 we have k <= 3051520
217
218    const LOG2_10H: f64 = f64::from_bits(0x3f134413509f79ff);
219    const LOG2_10M: f64 = f64::from_bits(0xbb89dc1da9800000);
220    const LOG2_10L: f64 = f64::from_bits(0xb984fd20dba1f655);
221
222    let yhh = dd_fmla(-k, LOG2_10H, x); // exact, |yh| <= 2^-13
223
224    let mut ky0 = DoubleDouble::from_exact_add(yhh, -k * LOG2_10M);
225    ky0.lo = dd_fmla(-k, LOG2_10L, ky0.lo);
226
227    /* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(10)
228    to use the accurate path of exp() */
229
230    let ky = DoubleDouble::quick_mult(ky0, DoubleDouble::new(LN10L, LN10H));
231
232    let ik = unsafe {
233        k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
234    };
235    let im = (ik >> 12).wrapping_add(0x3ff);
236    let i2 = (ik >> 6) & 0x3f;
237    let i1 = ik & 0x3f;
238
239    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
240    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
241
242    let p = DoubleDouble::quick_mult(t2, t1);
243
244    let mut q = q_2(ky);
245    q = DoubleDouble::quick_mult(p, q);
246    let mut ud: u64 = (im as u64).wrapping_shl(52);
247
248    if im == 0x7ff {
249        q.hi *= 2.0;
250        q.lo *= 2.0;
251        ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
252    }
253    q.hi *= f64::from_bits(ud);
254    q.lo *= f64::from_bits(ud);
255    q
256}
257
258#[cold]
259fn exp10m1_accurate_tiny(x: f64) -> f64 {
260    let x2 = x * x;
261    let x4 = x2 * x2;
262    /* The following is a degree-17 polynomial generated by Sollya
263    (file exp10m1_accurate.sollya),
264    which approximates exp10m1(x) with relative error bounded by 2^-107.506
265    for |x| <= 0.0625. */
266
267    const Q: [u64; 25] = [
268        0x40026bb1bbb55516,
269        0xbcaf48ad494ea3e9,
270        0x40053524c73cea69,
271        0xbcae2bfab318d696,
272        0x4000470591de2ca4,
273        0x3ca823527cebf918,
274        0x3ff2bd7609fd98c4,
275        0x3c931ea51f6641df,
276        0x3fe1429ffd1d4d76,
277        0x3c7117195be7f232,
278        0x3fca7ed70847c8b6,
279        0xbc54260c5e23d0c8,
280        0x3fb16e4dfc333a87,
281        0xbc533fd284110905,
282        0x3f94116b05fdaa5d,
283        0xbc20721de44d79a8,
284        0x3f74897c45d93d43,
285        0x3f52ea52b2d182ac,
286        0x3f2facfd5d905b22,
287        0x3f084fe12df8bde3,
288        0x3ee1398ad75d01bf,
289        0x3eb6a9e96fbf6be7,
290        0x3e8bd456a29007c2,
291        0x3e6006cf8378cf9b,
292        0x3e368862b132b6e2,
293    ];
294    let mut c13 = dd_fmla(f64::from_bits(Q[23]), x, f64::from_bits(Q[22])); // degree 15
295    let c11 = dd_fmla(f64::from_bits(Q[21]), x, f64::from_bits(Q[20])); // degree 14
296    c13 = dd_fmla(f64::from_bits(Q[24]), x2, c13); // degree 15
297    // add Q[19]*x+c13*x2+c15*x4 to Q[18] (degree 11)
298    let mut p = DoubleDouble::from_exact_add(
299        f64::from_bits(Q[18]),
300        f_fmla(f64::from_bits(Q[19]), x, f_fmla(c11, x2, c13 * x4)),
301    );
302    // multiply h+l by x and add Q[17] (degree 10)
303    p = DoubleDouble::quick_f64_mult(x, p);
304    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[17]), p.hi);
305    p.lo += p0.lo;
306    p.hi = p0.hi;
307
308    // multiply h+l by x and add Q[16] (degree 9)
309    p = DoubleDouble::quick_f64_mult(x, p);
310    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[16]), p.hi);
311    p.lo += p0.lo;
312    p.hi = p0.hi;
313    // multiply h+l by x and add Q[14]+Q[15] (degree 8)
314    p = DoubleDouble::quick_f64_mult(x, p);
315    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
316    p.lo += p0.lo + f64::from_bits(Q[15]);
317    p.hi = p0.hi;
318    // multiply h+l by x and add Q[12]+Q[13] (degree 7)
319    p = DoubleDouble::quick_f64_mult(x, p);
320    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
321    p.lo += p0.lo + f64::from_bits(Q[13]);
322    p.hi = p0.hi;
323
324    // multiply h+l by x and add Q[10]+Q[11] (degree 6)
325    p = DoubleDouble::quick_f64_mult(x, p);
326    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
327    p.lo += p0.lo + f64::from_bits(Q[11]);
328    p.hi = p0.hi;
329
330    // multiply h+l by x and add Q[8]+Q[9] (degree 5)
331    p = DoubleDouble::quick_f64_mult(x, p);
332    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
333    p.lo += p0.lo + f64::from_bits(Q[9]);
334    p.hi = p0.hi;
335
336    // multiply h+l by x and add Q[6]+Q[7] (degree 4)
337    p = DoubleDouble::quick_f64_mult(x, p);
338    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
339    p.lo += p0.lo + f64::from_bits(Q[7]);
340    p.hi = p0.hi;
341
342    // multiply h+l by x and add Q[4]+Q[5] (degree 3)
343    p = DoubleDouble::quick_f64_mult(x, p);
344    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
345    p.lo += p0.lo + f64::from_bits(Q[5]);
346    p.hi = p0.hi;
347
348    // multiply h+l by x and add Q[2]+Q[3] (degree 2)
349    p = DoubleDouble::quick_f64_mult(x, p);
350    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
351    p.lo += p0.lo + f64::from_bits(Q[3]);
352    p.hi = p0.hi;
353
354    // multiply h+l by x and add Q[0]+Q[1] (degree 2)
355    p = DoubleDouble::quick_f64_mult(x, p);
356    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
357    p.lo += p0.lo + f64::from_bits(Q[1]);
358    p.hi = p0.hi;
359
360    // multiply h+l by x
361    p = DoubleDouble::quick_f64_mult(x, p);
362    p.to_f64()
363}
364
365#[cold]
366fn exp10m1_accurate(x: f64) -> f64 {
367    let t = x.to_bits();
368    let ux = t;
369    let ax = ux & 0x7fffffffffffffffu64;
370
371    if ax <= 0x3fc0000000000000u64 {
372        // |x| <= 0.125
373        return exp10m1_accurate_tiny(x);
374    }
375
376    let mut p = exp_2(x);
377
378    let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
379    p.lo += zf.lo;
380    p.hi = zf.hi;
381    p.to_f64()
382}
383
384/* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x),
385and return the maximal corresponding absolute error.
386We also have |x| > 0x1.0527dbd87e24dp-51.
387With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine
388exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns
3891.63414352331297e-20 < 2^-65.73, and
390exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns
3911.76283772822891e-20 < 2^-65.62, which proves the relative
392error is bounded by 2^-65.62. */
393#[inline]
394fn exp10m1_fast_tiny(x: f64) -> Exp10m1 {
395    /* The following is a degree-11 polynomial generated by Sollya
396    (file exp10m1_fast.sollya),
397    which approximates exp10m1(x) with relative error bounded by 2^-69.58
398    for |x| <= 0.0625. */
399    const P: [u64; 14] = [
400        0x40026bb1bbb55516,
401        0xbcaf48abcf79e094,
402        0x40053524c73cea69,
403        0xbcae1badf796d704,
404        0x4000470591de2ca4,
405        0x3ca7db8caacb2cea,
406        0x3ff2bd7609fd98ba,
407        0x3fe1429ffd1d4d98,
408        0x3fca7ed7084998e1,
409        0x3fb16e4dfc30944b,
410        0x3f94116ae4b57526,
411        0x3f74897c6a90f61c,
412        0x3f52ec689c32b3a0,
413        0x3f2faced20d698fe,
414    ];
415    let x2 = x * x;
416    let x4 = x2 * x2;
417    let mut c9 = dd_fmla(f64::from_bits(P[12]), x, f64::from_bits(P[11])); // degree 9
418    let c7 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 7
419    let mut c5 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 5
420    c9 = dd_fmla(f64::from_bits(P[13]), x2, c9); // degree 9
421    c5 = dd_fmla(c7, x2, c5); // degree 5
422    c5 = dd_fmla(c9, x4, c5); // degree 5
423
424    let mut p = DoubleDouble::from_exact_mult(c5, x);
425    let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[6]), p.hi);
426    p.lo += p0.lo;
427    p.hi = p0.hi;
428
429    p = DoubleDouble::quick_f64_mult(x, p);
430
431    let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
432    p.lo += p1.lo + f64::from_bits(P[5]);
433    p.hi = p1.hi;
434
435    p = DoubleDouble::quick_f64_mult(x, p);
436
437    let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
438    p.lo += p2.lo + f64::from_bits(P[3]);
439    p.hi = p2.hi;
440
441    p = DoubleDouble::quick_f64_mult(x, p);
442
443    let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
444    p.lo += p2.lo + f64::from_bits(P[1]);
445    p.hi = p2.hi;
446
447    p = DoubleDouble::quick_f64_mult(x, p);
448
449    Exp10m1 {
450        exp: p,
451        err: f64::from_bits(0x3bb0a00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66
452    }
453}
454
455/// Computes 10^x - 1
456///
457/// Max found ULP 0.5
458pub fn f_exp10m1(d: f64) -> f64 {
459    let mut x = d;
460    let t = x.to_bits();
461    let ux = t;
462    let ax = ux & 0x7fffffffffffffffu64;
463
464    if ux >= 0xc03041704c068ef0u64 {
465        // x = -NaN or x <= -0x1.041704c068efp+4
466        if (ux >> 52) == 0xfff {
467            // -NaN or -Inf
468            return if ux > 0xfff0000000000000u64 {
469                x + x
470            } else {
471                -1.0
472            };
473        }
474        // for x <= -0x1.041704c068efp+4, exp10m1(x) rounds to -1 to nearest
475        return -1.0 + f64::from_bits(0x3c90000000000000);
476    } else if ax > 0x40734413509f79feu64 {
477        // x = +NaN or x >= 1024
478        if (ux >> 52) == 0x7ff {
479            // +NaN
480            return x + x;
481        }
482        return f64::from_bits(0x7fefffffffffffff) * x;
483    } else if ax <= 0x3c90000000000000u64
484    // |x| <= 0x1.0527dbd87e24dp-51
485    /* then the second term of the Taylor expansion of 2^x-1 at x=0 is
486    smaller in absolute value than 1/2 ulp(first term):
487    log(2)*x + log(2)^2*x^2/2 + ... */
488    {
489        /* we use special code when log(2)*|x| is very small, in which case
490        the double-double approximation h+l has its lower part l
491        "truncated" */
492        return if ax <= 0x3970000000000000u64
493        // |x| <= 2^-104
494        {
495            // special case for 0
496            if x == 0. {
497                return x;
498            }
499            if x.abs() == f64::from_bits(0x000086c73059343c) {
500                return dd_fmla(
501                    -f64::copysign(f64::from_bits(0x1e60010000000000), x),
502                    f64::from_bits(0x1e50000000000000),
503                    f64::copysign(f64::from_bits(0x000136568740cb56), x),
504                );
505            }
506            if x.abs() == f64::from_bits(0x00013a7b70d0248c) {
507                return dd_fmla(
508                    f64::copysign(f64::from_bits(0x1e5ffe0000000000), x),
509                    f64::from_bits(0x1e50000000000000),
510                    f64::copysign(f64::from_bits(0x0002d41f3b972fc7), x),
511                );
512            }
513
514            // scale x by 2^106
515            x *= f64::from_bits(0x4690000000000000);
516            let mut z = DoubleDouble::from_exact_mult(LN10H, x);
517            z.lo = dd_fmla(LN10L, x, z.lo);
518            let mut h2 = z.to_f64(); // round to 53-bit precision
519            // scale back, hoping to avoid double rounding
520            h2 *= f64::from_bits(0x3950000000000000);
521            // now subtract back h2 * 2^106 from h to get the correction term
522            let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
523            // add l
524            h += z.lo;
525            /* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact,
526            thus no underflow will be raised. We have underflow for
527            0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for
528            0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */
529            dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
530        } else {
531            const C2: f64 = f64::from_bits(0x40053524c73cea69); // log(2)^2/2
532            let mut z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN10L, LN10H), x);
533            /* h+l approximates the first term x*log(2) */
534            /* we add C2*x^2 last, so that in case there is a cancellation in
535            LN10L*x+l, it will contribute more bits */
536            z.lo = dd_fmla(C2 * x, x, z.lo);
537            z.to_f64()
538        };
539    }
540
541    /* now -0x1.041704c068efp+4 < x < -2^-54
542    or 2^-54 < x <= 0x1.34413509f79fep+8 */
543
544    /* 10^x-1 is exact for x integer, 1 <= x <= 15 */
545    if ux << 15 == 0 {
546        let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() };
547        if x == i as f64 && 1 <= i && i <= 15 {
548            static EXP10_1_15: [u64; 16] = [
549                0x0000000000000000,
550                0x4022000000000000,
551                0x4058c00000000000,
552                0x408f380000000000,
553                0x40c3878000000000,
554                0x40f869f000000000,
555                0x412e847e00000000,
556                0x416312cfe0000000,
557                0x4197d783fc000000,
558                0x41cdcd64ff800000,
559                0x4202a05f1ff80000,
560                0x42374876e7ff0000,
561                0x426d1a94a1ffe000,
562                0x42a2309ce53ffe00,
563                0x42d6bcc41e8fffc0,
564                0x430c6bf52633fff8,
565            ];
566            return f64::from_bits(EXP10_1_15[i as usize]);
567        }
568    }
569
570    let result = exp10m1_fast(x, ax <= 0x3fb0000000000000u64);
571    let left = result.exp.hi + (result.exp.lo - result.err);
572    let right = result.exp.hi + (result.exp.lo + result.err);
573    if left != right {
574        return exp10m1_accurate(x);
575    }
576    left
577}
578
579#[cfg(test)]
580mod tests {
581    use super::*;
582
583    #[test]
584    fn test_exp10m1() {
585        assert_eq!(f_exp10m1(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002364140972981833),
586                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005443635762124408);
587        assert_eq!(99., f_exp10m1(2.0));
588        assert_eq!(315.22776601683796, f_exp10m1(2.5));
589        assert_eq!(-0.7056827241416722, f_exp10m1(-0.5311842449009418));
590    }
591}
pxfm/exponents/exp10m1.rs

pxfm/exponents/
exp10m1.rs