pxfm/exponents/
exp2m1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::fast_ldexp;
32use crate::rounding::CpuFloor;
33use crate::rounding::CpuRoundTiesEven;
34
35const LN2H: f64 = f64::from_bits(0x3fe62e42fefa39ef);
36const LN2L: f64 = f64::from_bits(0x3c7abc9e3b39803f);
37
38struct Exp2m1 {
39    exp: DoubleDouble,
40    err: f64,
41}
42
43/* For 0 <= i < 64, T1[i] = (h,l) such that h+l is the best double-double
44approximation of 2^(i/64). The approximation error is bounded as follows:
45|h + l - 2^(i/64)| < 2^-107. */
46pub(crate) static EXP_M1_2_TABLE1: [(u64, u64); 64] = [
47    (0x0000000000000000, 0x3ff0000000000000),
48    (0xbc719083535b085d, 0x3ff02c9a3e778061),
49    (0x3c8d73e2a475b465, 0x3ff059b0d3158574),
50    (0x3c6186be4bb284ff, 0x3ff0874518759bc8),
51    (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
52    (0x3c403a1727c57b53, 0x3ff0e3ec32d3d1a2),
53    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
54    (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
55    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
56    (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
57    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
58    (0x3c8dc775814a8495, 0x3ff2063b88628cd6),
59    (0x3c99b07eb6c70573, 0x3ff2387a6e756238),
60    (0x3c82bd339940e9d9, 0x3ff26b4565e27cdd),
61    (0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
62    (0x3c90024754db41d5, 0x3ff2d285a6e4030b),
63    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
64    (0x3c932721843659a6, 0x3ff33c08b26416ff),
65    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
66    (0xbc75e436d661f5e3, 0x3ff3a7db34e59ff7),
67    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
68    (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
69    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
70    (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
71    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
72    (0xbc94b309d25957e3, 0x3ff4f9b2769d2ca7),
73    (0xbc807abe1db13cad, 0x3ff5342b569d4f82),
74    (0x3c99bb2c011d93ad, 0x3ff56f4736b527da),
75    (0x3c96324c054647ad, 0x3ff5ab07dd485429),
76    (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
77    (0xbc9383c17e40b497, 0x3ff6247eb03a5585),
78    (0xbc9bb60987591c34, 0x3ff6623882552225),
79    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
80    (0xbc6bbe3a683c88ab, 0x3ff6dfb23c651a2f),
81    (0xbc816e4786887a99, 0x3ff71f75e8ec5f74),
82    (0xbc90245957316dd3, 0x3ff75feb564267c9),
83    (0xbc841577ee04992f, 0x3ff7a11473eb0187),
84    (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
85    (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
86    (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
87    (0x3c96e9f156864b27, 0x3ff8ace5422aa0db),
88    (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
89    (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
90    (0xbc9d185b7c1b85d1, 0x3ff97d829fde4e50),
91    (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
92    (0xbc9359495d1cd533, 0x3ffa0c667b5de565),
93    (0xbc9d2f6edb8d41e1, 0x3ffa5503b23e255d),
94    (0x3c90fac90ef7fd31, 0x3ffa9e6b5579fdbf),
95    (0x3c97a1cd345dcc81, 0x3ffae89f995ad3ad),
96    (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
97    (0xbc75584f7e54ac3b, 0x3ffb7f76f2fb5e47),
98    (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
99    (0x3c811065895048dd, 0x3ffc199bdd85529c),
100    (0x3c92884dff483cad, 0x3ffc67f12e57d14b),
101    (0x3c7503cbd1e949db, 0x3ffcb720dcef9069),
102    (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
103    (0x3c82ed02d75b3707, 0x3ffd5818dcfba487),
104    (0x3c9c2300696db532, 0x3ffda9e603db3285),
105    (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
106    (0x3c839e8980a9cc8f, 0x3ffe502ee78b3ff6),
107    (0xbc9e9c23179c2893, 0x3ffea4afa2a490da),
108    (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
109    (0x3c99d3e12dd8a18b, 0x3fff50765b6e4540),
110    (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
111];
112
113/* For 0 <= i < 64, T2[i] = (h,l) such that h+l is the best double-double
114approximation of 2^(i/2^12). The approximation error is bounded as follows:
115|h + l - 2^(i/2^12)| < 2^-107. */
116pub(crate) static EXP_M1_2_TABLE2: [(u64, u64); 64] = [
117    (0x0000000000000000, 0x3ff0000000000000),
118    (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
119    (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
120    (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
121    (0xbc8d7c96f201bb2f, 0x3ff002c605e2e8cf),
122    (0x3c984711d4c35e9f, 0x3ff003779a95f959),
123    (0xbc80484245243777, 0x3ff0042936faa3d8),
124    (0xbc94b237da2025f9, 0x3ff004dadb113da0),
125    (0xbc75e00e62d6b30d, 0x3ff0058c86da1c0a),
126    (0x3c9a1d6cedbb9481, 0x3ff0063e3a559473),
127    (0xbc94acf197a00142, 0x3ff006eff583fc3d),
128    (0xbc6eaf2ea42391a5, 0x3ff007a1b865a8ca),
129    (0x3c7da93f90835f75, 0x3ff0085382faef83),
130    (0xbc86a79084ab093c, 0x3ff00905554425d4),
131    (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
132    (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
133    (0xbc84f6b2a7609f71, 0x3ff00b1afa5abcbf),
134    (0xbc7e1a258ea8f71b, 0x3ff00bcceb7707ec),
135    (0x3c74362ca5bc26f1, 0x3ff00c7ee448ee02),
136    (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
137    (0xbc6406ac4e81a645, 0x3ff00de2ed0ee0f5),
138    (0x3c9b5a6902767e09, 0x3ff00e94fd0398e0),
139    (0xbc991b2060859321, 0x3ff00f4714af41d3),
140    (0x3c8427068ab22306, 0x3ff00ff93412315c),
141    (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
142    (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
143    (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
144    (0xbc734104ee7edae9, 0x3ff012c1fecd613b),
145    (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
146    (0x3c7a8cd33b8a1bb3, 0x3ff01426927f5278),
147    (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
148    (0x3c857ba2dc7e0c73, 0x3ff0158b4517bb88),
149    (0x3c9b61299ab8cdb7, 0x3ff0163da9fb3335),
150    (0xbc990565902c5f44, 0x3ff016f0169949ed),
151    (0x3c870fc41c5c2d53, 0x3ff017a28af25567),
152    (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
153    (0xbc7008eff5142bf9, 0x3ff019078ad6a19f),
154    (0xbc977669f033c7de, 0x3ff019ba16628de2),
155    (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
156    (0x3c9371231477ece5, 0x3ff01b1f44af9f9e),
157    (0x3c75e7626621eb5b, 0x3ff01bd1e77170b4),
158    (0xbc9bc72b100828a5, 0x3ff01c8491f08f08),
159    (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
160    (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
161    (0xbc8c11f5239bf535, 0x3ff01e9cbfe113ef),
162    (0x3c8e1d4eb5edc6b3, 0x3ff01f4f8958c1c6),
163    (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
164    (0xbc98f06d8a148a32, 0x3ff020b533856324),
165    (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
166    (0xbc9c95a035eb4175, 0x3ff0221afcb09e3e),
167    (0xbc9491793e46834d, 0x3ff022cdece68c4f),
168    (0xbc73e8d0d9c49091, 0x3ff02380e4dd22ad),
169    (0xbc9314aa16278aa3, 0x3ff02433e494b755),
170    (0x3c848daf888e9651, 0x3ff024e6ec0da046),
171    (0x3c856dc8046821f4, 0x3ff02599fb483385),
172    (0x3c945b42356b9d47, 0x3ff0264d1244c719),
173    (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
174    (0x3c72106ed0920a34, 0x3ff027b357854772),
175    (0xbc9fd4cf26ea5d0f, 0x3ff0286685c9e059),
176    (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
177    (0x3c564cbba902ca27, 0x3ff029ccf99d720a),
178    (0x3c94383ef231d207, 0x3ff02a803f2d170d),
179    (0x3c94a47a505b3a47, 0x3ff02b338c811703),
180    (0x3c9e47120223467f, 0x3ff02be6e199c811),
181];
182
183// Approximation for the fast path of exp(z) for z=zh+zl,
184// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
185// (assuming x^y does not overflow or underflow)
186#[inline]
187fn q_1(dz: DoubleDouble) -> DoubleDouble {
188    const Q_1: [u64; 5] = [
189        0x3ff0000000000000,
190        0x3ff0000000000000,
191        0x3fe0000000000000,
192        0x3fc5555555995d37,
193        0x3fa55555558489dc,
194    ];
195    let z = dz.to_f64();
196    let mut q = f_fmla(f64::from_bits(Q_1[4]), dz.hi, f64::from_bits(Q_1[3]));
197    q = f_fmla(q, z, f64::from_bits(Q_1[2]));
198
199    let mut p0 = DoubleDouble::from_exact_add(f64::from_bits(Q_1[1]), q * z);
200    p0 = DoubleDouble::quick_mult(dz, p0);
201    p0 = DoubleDouble::f64_add(f64::from_bits(Q_1[0]), p0);
202    p0
203}
204
205#[inline]
206fn exp1(x: DoubleDouble) -> DoubleDouble {
207    const INVLOG2: f64 = f64::from_bits(0x40b71547652b82fe); /* |INVLOG2-2^12/log(2)| < 2^-43.4 */
208    let k = (x.hi * INVLOG2).cpu_round_ties_even();
209
210    const LOG2H: f64 = f64::from_bits(0x3f262e42fefa39ef);
211    const LOG2L: f64 = f64::from_bits(0x3bbabc9e3b39803f);
212    const LOG2DD: DoubleDouble = DoubleDouble::new(LOG2L, LOG2H);
213    let zk = DoubleDouble::quick_mult_f64(LOG2DD, k);
214
215    let mut yz = DoubleDouble::from_exact_add(x.hi - zk.hi, x.lo);
216    yz.lo -= zk.lo;
217
218    let ik: i64 = unsafe { k.to_int_unchecked::<i64>() }; /* Note: k is an integer, this is just a conversion. */
219    let im: i64 = (ik >> 12).wrapping_add(0x3ff);
220    let i2: i64 = (ik >> 6) & 0x3f;
221    let i1: i64 = ik & 0x3f;
222
223    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
224    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
225
226    let p0 = DoubleDouble::quick_mult(t2, t1);
227
228    let mut q = q_1(yz);
229    q = DoubleDouble::quick_mult(p0, q);
230
231    /* Scale by 2^k. Warning: for x near 1024, we can have k=2^22, thus
232    M = 2047, which encodes Inf */
233    let mut du = (im as u64).wrapping_shl(52);
234    if im == 0x7ff {
235        q.hi *= 2.0;
236        q.lo *= 2.0;
237        du = (im.wrapping_sub(1) as u64).wrapping_shl(52);
238    }
239    q.hi *= f64::from_bits(du);
240    q.lo *= f64::from_bits(du);
241    q
242}
243
244#[inline]
245fn exp2m1_fast(x: f64, tiny: bool) -> Exp2m1 {
246    if tiny {
247        return exp2m1_fast_tiny(x);
248    }
249    /* now -54 < x < -0.125 or 0.125 < x < 1024: we approximate exp(x*log(2))
250    and subtract 1 */
251    let mut v = DoubleDouble::from_exact_mult(LN2H, x);
252    v.lo = f_fmla(x, LN2L, v.lo);
253    /*
254    The a_mul() call is exact, and the error of the fma() is bounded by
255     ulp(l).
256     We have |t| <= ulp(h) <= ulp(LN2H*1024) = 2^-43,
257     |t+x*LN2L| <= 2^-43 * 1024*LN2L < 2^-42.7,
258     thus |l| <= |t| + |x*LN2L| + ulp(t+x*LN2L)
259              <= 2^-42.7 + 2^-95 <= 2^-42.6, and ulp(l) <= 2^-95.
260     Thus:
261     |h + l - x*log(2)| <= |h + l - x*(LN2H+LN2L)| + |x|*|LN2H+LN2L-log(2)|
262                        <= 2^-95 + 1024*2^-110.4 < 2^-94.9 */
263
264    let mut p = exp1(v);
265
266    let zf: DoubleDouble = if x >= 0. {
267        // implies h >= 1 and the fast_two_sum pre-condition holds
268        DoubleDouble::from_exact_add(p.hi, -1.0)
269    } else {
270        DoubleDouble::from_exact_add(-1.0, p.hi)
271    };
272    p.lo += zf.lo;
273    p.hi = zf.hi;
274    /* The error in the above fast_two_sum is bounded by 2^-105*|h|,
275    with the new value of h, thus the total absolute error is bounded
276    by eps1*|h_in|+2^-105*|h|.
277    Relatively to h this yields eps1*|h_in/h| + 2^-105, where the maximum
278    of |h_in/h| is obtained for x near -0.125, with |2^x/(2^x-1)| < 11.05.
279    We get a relative error bound of 2^-74.138*11.05 + 2^-105 < 2^-70.67. */
280    Exp2m1 {
281        exp: p,
282        err: f64::from_bits(0x3b84200000000000) * p.hi, /* 2^-70.67 < 0x1.42p-71 */
283    }
284}
285
286// Approximation for the accurate path of exp(z) for z=zh+zl,
287// with |z| < 0.000130273 < 2^-12.88 and |zl| < 2^-42.6
288// (assuming x^y does not overflow or underflow)
289#[inline]
290fn q_2(dz: DoubleDouble) -> DoubleDouble {
291    /* Let q[0]..q[7] be the coefficients of degree 0..7 of Q_2.
292    The ulp of q[7]*z^7 is at most 2^-155, thus we can compute q[7]*z^7
293    in double precision only.
294    The ulp of q[6]*z^6 is at most 2^-139, thus we can compute q[6]*z^6
295    in double precision only.
296    The ulp of q[5]*z^5 is at most 2^-124, thus we can compute q[5]*z^5
297    in double precision only. */
298
299    /* The following is a degree-7 polynomial generated by Sollya for exp(z)
300    over [-0.000130273,0.000130273] with absolute error < 2^-113.218
301    (see file exp_accurate.sollya). Since we use this code only for
302    |x| > 0.125 in exp2m1(x), the corresponding relative error for exp2m1
303    is about 2^-113.218/|exp2m1(-0.125)| which is about 2^-110. */
304    const Q_2: [u64; 9] = [
305        0x3ff0000000000000,
306        0x3ff0000000000000,
307        0x3fe0000000000000,
308        0x3fc5555555555555,
309        0x3c655555555c4d26,
310        0x3fa5555555555555,
311        0x3f81111111111111,
312        0x3f56c16c3fbb4213,
313        0x3f2a01a023ede0d7,
314    ];
315
316    let z = dz.to_f64();
317    let mut q = dd_fmla(f64::from_bits(Q_2[8]), dz.hi, f64::from_bits(Q_2[7]));
318    q = dd_fmla(q, z, f64::from_bits(Q_2[6]));
319    q = dd_fmla(q, z, f64::from_bits(Q_2[5]));
320
321    // multiply q by z and add Q_2[3] + Q_2[4]
322
323    let mut p = DoubleDouble::from_exact_mult(q, z);
324    let r0 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[3]), p.hi);
325    p.hi = r0.hi;
326    p.lo += r0.lo + f64::from_bits(Q_2[4]);
327
328    // multiply hi+lo by zh+zl and add Q_2[2]
329
330    p = DoubleDouble::quick_mult(p, dz);
331    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[2]), p.hi);
332    p.hi = r1.hi;
333    p.lo += r1.lo;
334
335    // multiply hi+lo by zh+zl and add Q_2[1]
336    p = DoubleDouble::quick_mult(p, dz);
337    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[1]), p.hi);
338    p.hi = r1.hi;
339    p.lo += r1.lo;
340
341    // multiply hi+lo by zh+zl and add Q_2[0]
342    p = DoubleDouble::quick_mult(p, dz);
343    let r1 = DoubleDouble::from_exact_add(f64::from_bits(Q_2[0]), p.hi);
344    p.hi = r1.hi;
345    p.lo += r1.lo;
346    p
347}
348
349// returns a double-double approximation hi+lo of exp(x*log(2)) for |x| < 745
350#[inline]
351fn exp_2(x: f64) -> DoubleDouble {
352    let k = (x * f64::from_bits(0x40b0000000000000)).cpu_round_ties_even();
353    // since |x| <= 745 we have k <= 3051520
354
355    let yhh = f_fmla(-k, f64::from_bits(0x3f30000000000000), x); // exact, |yh| <= 2^-13
356
357    /* now x = k + yh, thus 2^x = 2^k * 2^yh, and we multiply yh by log(2)
358    to use the accurate path of exp() */
359    let ky = DoubleDouble::quick_f64_mult(yhh, DoubleDouble::new(LN2L, LN2H));
360
361    let ik: i64 = unsafe {
362        k.to_int_unchecked::<i64>() // k is already integer, this is just a conversion
363    };
364    let im = (ik >> 12).wrapping_add(0x3ff);
365    let i2 = (ik >> 6) & 0x3f;
366    let i1 = ik & 0x3f;
367
368    let t1 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE1[i2 as usize]);
369    let t2 = DoubleDouble::from_bit_pair(EXP_M1_2_TABLE2[i1 as usize]);
370
371    let p = DoubleDouble::quick_mult(t2, t1);
372
373    let mut q = q_2(ky);
374    q = DoubleDouble::quick_mult(p, q);
375    let mut ud: u64 = (im as u64).wrapping_shl(52);
376
377    if im == 0x7ff {
378        q.hi *= 2.0;
379        q.lo *= 2.0;
380        ud = (im.wrapping_sub(1) as u64).wrapping_shl(52);
381    }
382    q.hi *= f64::from_bits(ud);
383    q.lo *= f64::from_bits(ud);
384    q
385}
386
387#[cold]
388pub(crate) fn exp2m1_accurate_tiny(x: f64) -> f64 {
389    let x2 = x * x;
390    let x4 = x2 * x2;
391    const Q: [u64; 22] = [
392        0x3fe62e42fefa39ef,
393        0x3c7abc9e3b398040,
394        0x3fcebfbdff82c58f,
395        0xbc65e43a53e44dcf,
396        0x3fac6b08d704a0c0,
397        0xbc4d331627517168,
398        0x3f83b2ab6fba4e77,
399        0x3c14e65df0779f8c,
400        0x3f55d87fe78a6731,
401        0x3bd0717fbf4bd050,
402        0x3f2430912f86c787,
403        0x3bcbd2bdec9bcd42,
404        0x3eeffcbfc588b0c7,
405        0xbb8e60aa6d5e4aa9,
406        0x3eb62c0223a5c824,
407        0x3e7b5253d395e7d4,
408        0x3e3e4cf5158b9160,
409        0x3dfe8cac734c6058,
410        0x3dbc3bd64f17199d,
411        0x3d78161a17e05651,
412        0x3d33150b3d792231,
413        0x3cec184260bfad7e,
414    ];
415    let mut c13 = dd_fmla(f64::from_bits(Q[20]), x, f64::from_bits(Q[19])); // degree 13
416    let c11 = dd_fmla(f64::from_bits(Q[18]), x, f64::from_bits(Q[17])); // degree 11
417    c13 = dd_fmla(f64::from_bits(Q[21]), x2, c13); // degree 13
418    // add Q[16]*x+c11*x2+c13*x4 to Q[15] (degree 9)
419    let mut p = DoubleDouble::from_exact_add(
420        f64::from_bits(Q[15]),
421        f_fmla(f64::from_bits(Q[16]), x, f_fmla(c11, x2, c13 * x4)),
422    );
423    // multiply h+l by x and add Q[14] (degree 8)
424    p = DoubleDouble::quick_f64_mult(x, p);
425    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[14]), p.hi);
426    p.lo += p0.lo;
427    p.hi = p0.hi;
428
429    // multiply h+l by x and add Q[12]+Q[13] (degree 7)
430    p = DoubleDouble::quick_f64_mult(x, p);
431    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[12]), p.hi);
432    p.lo += p0.lo + f64::from_bits(Q[13]);
433    p.hi = p0.hi;
434    // multiply h+l by x and add Q[10]+Q[11] (degree 6)
435    p = DoubleDouble::quick_f64_mult(x, p);
436    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[10]), p.hi);
437    p.lo += p0.lo + f64::from_bits(Q[11]);
438    p.hi = p0.hi;
439    // multiply h+l by x and add Q[8]+Q[9] (degree 5)
440    p = DoubleDouble::quick_f64_mult(x, p);
441    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[8]), p.hi);
442    p.lo += p0.lo + f64::from_bits(Q[9]);
443    p.hi = p0.hi;
444    // multiply h+l by x and add Q[6]+Q[7] (degree 4)
445    p = DoubleDouble::quick_f64_mult(x, p);
446    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[6]), p.hi);
447    p.lo += p0.lo + f64::from_bits(Q[7]);
448    p.hi = p0.hi;
449    // multiply h+l by x and add Q[4]+Q[5] (degree 3)
450    p = DoubleDouble::quick_f64_mult(x, p);
451    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[4]), p.hi);
452    p.lo += p0.lo + f64::from_bits(Q[5]);
453    p.hi = p0.hi;
454    // multiply h+l by x and add Q[2]+Q[3] (degree 2)
455    p = DoubleDouble::quick_f64_mult(x, p);
456    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[2]), p.hi);
457    p.lo += p0.lo + f64::from_bits(Q[3]);
458    p.hi = p0.hi;
459    // multiply h+l by x and add Q[0]+Q[1] (degree 2)
460    p = DoubleDouble::quick_f64_mult(x, p);
461    let p0 = DoubleDouble::from_exact_add(f64::from_bits(Q[0]), p.hi);
462    p.lo += p0.lo + f64::from_bits(Q[1]);
463    p.hi = p0.hi;
464    // multiply h+l by x
465    p = DoubleDouble::quick_f64_mult(x, p);
466    p.to_f64()
467}
468
469#[cold]
470fn exp2m1_accurate(x: f64) -> f64 {
471    let t = x.to_bits();
472    let ux = t;
473    let ax = ux & 0x7fffffffffffffffu64;
474
475    if ax <= 0x3fc0000000000000u64 {
476        // |x| <= 0.125
477        return exp2m1_accurate_tiny(x);
478    }
479
480    let mut p = exp_2(x);
481
482    let zf: DoubleDouble = DoubleDouble::from_full_exact_add(p.hi, -1.0);
483    p.lo += zf.lo;
484    p.hi = zf.hi;
485    p.to_f64()
486}
487
488/* |x| <= 0.125, put in h + l a double-double approximation of exp2m1(x),
489and return the maximal corresponding absolute error.
490We also have |x| > 0x1.0527dbd87e24dp-51.
491With xmin=RR("0x1.0527dbd87e24dp-51",16), the routine
492exp2m1_fast_tiny_all(xmin,0.125,2^-65.73) in exp2m1.sage returns
4931.63414352331297e-20 < 2^-65.73, and
494exp2m1_fast_tiny_all(-0.125,-xmin,2^-65.62) returns
4951.76283772822891e-20 < 2^-65.62, which proves the relative
496error is bounded by 2^-65.62. */
497#[inline]
498fn exp2m1_fast_tiny(x: f64) -> Exp2m1 {
499    /* The maximal value of |c4*x^4/exp2m1(x)| over [-0.125,0.125]
500    is less than 2^-15.109, where c4 is the degree-4 coefficient,
501    thus we can compute the coefficients of degree 4 or higher
502    using double precision only. */
503    const P: [u64; 12] = [
504        0x3fe62e42fefa39ef,
505        0x3c7abd1697afcaf8,
506        0x3fcebfbdff82c58f,
507        0xbc65e5a1d09e1599,
508        0x3fac6b08d704a0bf,
509        0x3f83b2ab6fba4e78,
510        0x3f55d87fe78a84e6,
511        0x3f2430912f86a480,
512        0x3eeffcbfbc1f2b36,
513        0x3eb62c0226c7f6d1,
514        0x3e7b539529819e63,
515        0x3e3e4d552bed5b9c,
516    ];
517    let x2 = x * x;
518    let x4 = x2 * x2;
519    let mut c8 = dd_fmla(f64::from_bits(P[10]), x, f64::from_bits(P[9])); // degree 8
520    let c6 = dd_fmla(f64::from_bits(P[8]), x, f64::from_bits(P[7])); // degree 6
521    let mut c4 = dd_fmla(f64::from_bits(P[6]), x, f64::from_bits(P[5])); // degree 4
522    c8 = dd_fmla(f64::from_bits(P[11]), x2, c8); // degree 8
523    c4 = dd_fmla(c6, x2, c4); // degree 4
524    c4 = dd_fmla(c8, x4, c4); // degree 4
525
526    let mut p = DoubleDouble::from_exact_mult(c4, x);
527    let p0 = DoubleDouble::from_exact_add(f64::from_bits(P[4]), p.hi);
528    p.lo += p0.lo;
529    p.hi = p0.hi;
530
531    p = DoubleDouble::quick_f64_mult(x, p);
532
533    let p1 = DoubleDouble::from_exact_add(f64::from_bits(P[2]), p.hi);
534    p.lo += p1.lo + f64::from_bits(P[3]);
535    p.hi = p1.hi;
536
537    p = DoubleDouble::quick_f64_mult(x, p);
538    let p2 = DoubleDouble::from_exact_add(f64::from_bits(P[0]), p.hi);
539    p.lo += p2.lo + f64::from_bits(P[1]);
540    p.hi = p2.hi;
541
542    p = DoubleDouble::quick_f64_mult(x, p);
543
544    Exp2m1 {
545        exp: p,
546        err: f64::from_bits(0x3bd4e00000000000) * p.hi, // 2^-65.62 < 0x1.4ep-66
547    }
548}
549
550/// Computes 2^x - 1
551///
552/// Max found ULP 0.5
553pub fn f_exp2m1(d: f64) -> f64 {
554    let mut x = d;
555    let t = x.to_bits();
556    let ux = t;
557    let ax = ux & 0x7fffffffffffffffu64;
558
559    if ux >= 0xc04b000000000000u64 {
560        // x = -NaN or x <= -54
561        if (ux >> 52) == 0xfff {
562            // -NaN or -Inf
563            return if ux > 0xfff0000000000000u64 {
564                x + x
565            } else {
566                -1.0
567            };
568        }
569        // for x <= -54, exp2m1(x) rounds to -1 to nearest
570        return -1.0 + f64::from_bits(0x3c90000000000000);
571    } else if ax >= 0x4090000000000000u64 {
572        // x = +NaN or x >= 1024
573        if (ux >> 52) == 0x7ff {
574            // +NaN
575            return x + x;
576        }
577        /* for x >= 1024, exp2m1(x) rounds to +Inf to nearest,
578        but for RNDZ/RNDD, we should have no overflow for x=1024 */
579        return f_fmla(
580            x,
581            f64::from_bits(0x7bffffffffffffff),
582            f64::from_bits(0x7fefffffffffffff),
583        );
584    } else if ax <= 0x3cc0527dbd87e24du64
585    // |x| <= 0x1.0527dbd87e24dp-51
586    /* then the second term of the Taylor expansion of 2^x-1 at x=0 is
587    smaller in absolute value than 1/2 ulp(first term):
588    log(2)*x + log(2)^2*x^2/2 + ... */
589    {
590        /* we use special code when log(2)*|x| is very small, in which case
591        the double-double approximation h+l has its lower part l
592        "truncated" */
593        return if ax <= 0x3970000000000000u64
594        // |x| <= 2^-104
595        {
596            // special case for 0
597            if x == 0. {
598                return x;
599            }
600            // scale x by 2^106
601            x *= f64::from_bits(0x4690000000000000);
602            let z = DoubleDouble::quick_mult_f64(DoubleDouble::new(LN2L, LN2H), x);
603            let mut h2 = z.to_f64(); // round to 53-bit precision
604            // scale back, hoping to avoid double rounding
605            h2 *= f64::from_bits(0x3950000000000000);
606            // now subtract back h2 * 2^106 from h to get the correction term
607            let mut h = dd_fmla(-h2, f64::from_bits(0x4690000000000000), z.hi);
608            // add l
609            h += z.lo;
610            /* add h2 + h * 2^-106. Warning: when h=0, 2^-106*h2 might be exact,
611            thus no underflow will be raised. We have underflow for
612            0 < x <= 0x1.71547652b82fep-1022 for RNDZ, and for
613            0 < x <= 0x1.71547652b82fdp-1022 for RNDN/RNDU. */
614            dyad_fmla(h, f64::from_bits(0x3950000000000000), h2)
615        } else {
616            const C2: f64 = f64::from_bits(0x3fcebfbdff82c58f); // log(2)^2/2
617            let mut z = DoubleDouble::from_exact_mult(LN2H, x);
618            z.lo = dyad_fmla(LN2L, x, z.lo);
619            /* h+l approximates the first term x*log(2) */
620            /* we add C2*x^2 last, so that in case there is a cancellation in
621            LN2L*x+l, it will contribute more bits */
622            z.lo += C2 * x * x;
623            z.to_f64()
624        };
625    }
626
627    /* now -54 < x < -0x1.0527dbd87e24dp-51
628    or 0x1.0527dbd87e24dp-51 < x < 1024 */
629
630    /* 2^x-1 is exact for x integer, -53 <= x <= 53 */
631    if ux.wrapping_shl(17) == 0 {
632        let i = unsafe { x.cpu_floor().to_int_unchecked::<i32>() };
633        if x == i as f64 && -53 <= i && i <= 53 {
634            return if i >= 0 {
635                ((1u64 << i) - 1) as f64
636            } else {
637                -1.0 + fast_ldexp(1.0, i)
638            };
639        }
640    }
641
642    let result = exp2m1_fast(x, ax <= 0x3fc0000000000000u64);
643    let left = result.exp.hi + (result.exp.lo - result.err);
644    let right = result.exp.hi + (result.exp.lo + result.err);
645    if left != right {
646        return exp2m1_accurate(x);
647    }
648    left
649}
650
651#[cfg(test)]
652mod tests {
653    use super::*;
654
655    #[test]
656    fn test_exp2m1() {
657        assert_eq!(f_exp2m1(5.4172231599824623E-312), 3.75493295981e-312);
658        assert_eq!(f_exp2m1( 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000017800593653177087), 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012338431302992956);
659        assert_eq!(3., f_exp2m1(2.0));
660        assert_eq!(4.656854249492381, f_exp2m1(2.5));
661        assert_eq!(-0.30801352040368324, f_exp2m1(-0.5311842449009418));
662    }
663}
pxfm/exponents/exp2m1.rs

pxfm/exponents/
exp2m1.rs