pxfm/exponents/
expm1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::fast_ldexp;
32use crate::round_ties_even::RoundTiesEven;
33use crate::shared_eval::poly_dekker_generic;
34use std::hint::black_box;
35
36static TZ: [(u64, u64); 65] = [
37    (0xbc6797d4686c5393, 0xbfcc5041854df7d4),
38    (0xbc8ea1cb9d163339, 0xbfcb881a23aebb48),
39    (0x3c8f483a3e8cd60f, 0xbfcabe60e1f21838),
40    (0x3c7dffd920f493db, 0xbfc9f3129931fab0),
41    (0xbc851bfdbb129094, 0xbfc9262c1c3430a0),
42    (0x3c8cd3e5225e2206, 0xbfc857aa375db4e4),
43    (0x3c5e3a6bdaece8f9, 0xbfc78789b0a5e0c0),
44    (0xbc8daf2ae0c2d3d4, 0xbfc6b5c7478983d8),
45    (0xbc7fd36226fadd44, 0xbfc5e25fb4fde210),
46    (0x3c7d887cd0341ab0, 0xbfc50d4fab639758),
47    (0xbc8676a52a1a618b, 0xbfc43693d679612c),
48    (0x3c79776b420ad283, 0xbfc35e28db4ecd9c),
49    (0x3c73d5fd7d70a5ed, 0xbfc2840b5836cf68),
50    (0x3c5a94ad2c8fa0bf, 0xbfc1a837e4ba3760),
51    (0x3c26ad4c353465b0, 0xbfc0caab118a1278),
52    (0xbc78bba170e59b65, 0xbfbfd6c2d0e3d910),
53    (0xbc8e1e0a76cb0685, 0xbfbe14aed893eef0),
54    (0x3c8fe131f55e75f8, 0xbfbc4f1331d22d40),
55    (0xbc8b5beee8bcee31, 0xbfba85e8c62d9c10),
56    (0xbc77fe9b02c25e9b, 0xbfb8b92870fa2b58),
57    (0xbc832ae7bdaf1116, 0xbfb6e8caff341fe8),
58    (0x3c7a6cfe58cbd73b, 0xbfb514c92f634788),
59    (0x3c68798de3138a56, 0xbfb33d1bb17df2e8),
60    (0xbc3589321a7ef10b, 0xbfb161bb26cbb590),
61    (0xbc78d0e700fcfb65, 0xbfaf0540438fd5c0),
62    (0x3c8473ef07d5dd3b, 0xbfab3f864c080000),
63    (0xbc838e62149c16e2, 0xbfa7723950130400),
64    (0xbc508bb6309bd394, 0xbfa39d4a1a77e050),
65    (0xbc8bad3fd501a227, 0xbf9f8152aee94500),
66    (0x3c63d27ac39ed253, 0xbf97b88f290230e0),
67    (0xbc8b60bbd08aac55, 0xbf8fc055004416c0),
68    (0xbc4a00d03b3359de, 0xbf7fe0154aaeed80),
69    (0x0000000000000000, 0x0000000000000000),
70    (0x3c8861931c15e39b, 0x3f80100ab00222c0),
71    (0x3c77ab864b3e9045, 0x3f90202ad5778e40),
72    (0x3c74e5659d75e95b, 0x3f984890d9043740),
73    (0x3c78e0bd083aba81, 0x3fa040ac0224fd90),
74    (0x3c345cc1cf959b1b, 0x3fa465509d383eb0),
75    (0xbc8eb6980ce14da7, 0x3fa89246d053d180),
76    (0x3c77324137d6c342, 0x3facc79f4f5613a0),
77    (0xbc45272ff30eed1b, 0x3fb082b577d34ed8),
78    (0xbc81280f19dace1c, 0x3fb2a5dd543ccc50),
79    (0xbc8d550af31c8ec3, 0x3fb4cd4fc989cd68),
80    (0x3c87923b72aa582d, 0x3fb6f91575870690),
81    (0xbc776c2e732457f1, 0x3fb92937074e0cd8),
82    (0x3c881f5c92a5200f, 0x3fbb5dbd3f681220),
83    (0x3c8e8ac7a4d3206c, 0x3fbd96b0eff0e790),
84    (0xbc712db6f4bbe33b, 0x3fbfd41afcba45e8),
85    (0xbc58c4a5df1ec7e5, 0x3fc10b022db7ae68),
86    (0xbc6bd4b1c37ea8a2, 0x3fc22e3b09dc54d8),
87    (0x3c85aeb9860044d0, 0x3fc353bc9fb00b20),
88    (0xbc64c26602c63fda, 0x3fc47b8b853aafec),
89    (0xbc87f644c1f9d314, 0x3fc5a5ac59b963cc),
90    (0x3c8f5aa8ec61fc2d, 0x3fc6d223c5b10638),
91    (0x3c27ab912c69ffeb, 0x3fc800f67b00d7b8),
92    (0xbc5b3564bc0ec9cd, 0x3fc9322934f54148),
93    (0x3c86a7062465be33, 0x3fca65c0b85ac1a8),
94    (0xbc885718d2ff1bf4, 0x3fcb9bc1d3910094),
95    (0xbc8045cb0c685e08, 0x3fccd4315e9e0834),
96    (0xbc16e7fb859d5055, 0x3fce0f143b41a554),
97    (0x3c851bbdee020603, 0x3fcf4c6f5508ee5c),
98    (0x3c6e17611afc42c5, 0x3fd04623d0b0f8c8),
99    (0xbc71c5b2e8735a43, 0x3fd0e7510fd7c564),
100    (0xbc825fe139c4cffd, 0x3fd189c1ecaeb084),
101    (0xbc789843c4964554, 0x3fd22d78f0fa061a),
102];
103
104pub(crate) static EXPM1_T0: [(u64, u64); 64] = [
105    (0x0000000000000000, 0x3ff0000000000000),
106    (0xbc719083535b085e, 0x3ff02c9a3e778061),
107    (0x3c8d73e2a475b466, 0x3ff059b0d3158574),
108    (0x3c6186be4bb28500, 0x3ff0874518759bc8),
109    (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
110    (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
111    (0xbc96c51039449b3a, 0x3ff11301d0125b51),
112    (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
113    (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
114    (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
115    (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
116    (0x3c8dc775814a8494, 0x3ff2063b88628cd6),
117    (0x3c99b07eb6c70572, 0x3ff2387a6e756238),
118    (0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
119    (0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
120    (0x3c90024754db41d4, 0x3ff2d285a6e4030b),
121    (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
122    (0x3c932721843659a6, 0x3ff33c08b26416ff),
123    (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
124    (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
125    (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
126    (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
127    (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
128    (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
129    (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
130    (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
131    (0xbc807abe1db13cac, 0x3ff5342b569d4f82),
132    (0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
133    (0x3c96324c054647ac, 0x3ff5ab07dd485429),
134    (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
135    (0xbc9383c17e40b496, 0x3ff6247eb03a5585),
136    (0xbc9bb60987591c34, 0x3ff6623882552225),
137    (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
138    (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
139    (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
140    (0xbc90245957316dd4, 0x3ff75feb564267c9),
141    (0xbc841577ee049930, 0x3ff7a11473eb0187),
142    (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
143    (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
144    (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
145    (0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
146    (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
147    (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
148    (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
149    (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
150    (0xbc9359495d1cd532, 0x3ffa0c667b5de565),
151    (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
152    (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
153    (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
154    (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
155    (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
156    (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
157    (0x3c811065895048de, 0x3ffc199bdd85529c),
158    (0x3c92884dff483cac, 0x3ffc67f12e57d14b),
159    (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
160    (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
161    (0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
162    (0x3c9c2300696db532, 0x3ffda9e603db3285),
163    (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
164    (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
165    (0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
166    (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
167    (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
168    (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
169];
170
171pub(crate) static EXPM1_T1: [(u64, u64); 64] = [
172    (0x0000000000000000, 0x3ff0000000000000),
173    (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
174    (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
175    (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
176    (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
177    (0x3c984711d4c35ea0, 0x3ff003779a95f959),
178    (0xbc80484245243778, 0x3ff0042936faa3d8),
179    (0xbc94b237da2025fa, 0x3ff004dadb113da0),
180    (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
181    (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
182    (0xbc94acf197a00142, 0x3ff006eff583fc3d),
183    (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
184    (0x3c7da93f90835f76, 0x3ff0085382faef83),
185    (0xbc86a79084ab093c, 0x3ff00905554425d4),
186    (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
187    (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
188    (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
189    (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
190    (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
191    (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
192    (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
193    (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
194    (0xbc991b2060859320, 0x3ff00f4714af41d3),
195    (0x3c8427068ab22306, 0x3ff00ff93412315c),
196    (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
197    (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
198    (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
199    (0xbc734104ee7edae8, 0x3ff012c1fecd613b),
200    (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
201    (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
202    (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
203    (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
204    (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
205    (0xbc990565902c5f44, 0x3ff016f0169949ed),
206    (0x3c870fc41c5c2d54, 0x3ff017a28af25567),
207    (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
208    (0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
209    (0xbc977669f033c7de, 0x3ff019ba16628de2),
210    (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
211    (0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
212    (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
213    (0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
214    (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
215    (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
216    (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
217    (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
218    (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
219    (0xbc98f06d8a148a32, 0x3ff020b533856324),
220    (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
221    (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
222    (0xbc9491793e46834c, 0x3ff022cdece68c4f),
223    (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
224    (0xbc9314aa16278aa4, 0x3ff02433e494b755),
225    (0x3c848daf888e9650, 0x3ff024e6ec0da046),
226    (0x3c856dc8046821f4, 0x3ff02599fb483385),
227    (0x3c945b42356b9d46, 0x3ff0264d1244c719),
228    (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
229    (0x3c72106ed0920a34, 0x3ff027b357854772),
230    (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
231    (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
232    (0x3c564cbba902ca28, 0x3ff029ccf99d720a),
233    (0x3c94383ef231d206, 0x3ff02a803f2d170d),
234    (0x3c94a47a505b3a46, 0x3ff02b338c811703),
235    (0x3c9e471202234680, 0x3ff02be6e199c811),
236];
237
238static EXPM1_DD1: [(u64, u64); 11] = [
239    (0x3c65555555555554, 0x3fc5555555555555),
240    (0x3c45555555555123, 0x3fa5555555555555),
241    (0x3c01111111118167, 0x3f81111111111111),
242    (0xbbef49f49e220cea, 0x3f56c16c16c16c17),
243    (0x3b6a019eff6f919c, 0x3f2a01a01a01a01a),
244    (0x3b39fcff48a75b41, 0x3efa01a01a01a01a),
245    (0xbb6c14f73758cd7f, 0x3ec71de3a556c734),
246    (0x3b3dfce97931018f, 0x3e927e4fb7789f5c),
247    (0x3afc513da9e4c9c5, 0x3e5ae64567f544e3),
248    (0x3acca00af84f2b60, 0x3e21eed8eff8d831),
249    (0x3a8f27ac6000898f, 0x3de6124613a86e8f),
250];
251
252static EXPM1_DD2: [(u64, u64); 7] = [
253    (0x3ff0000000000000, 0x0000000000000000),
254    (0x3fe0000000000000, 0x39c712f72ecec2cf),
255    (0x3fc5555555555555, 0x3c65555555554d07),
256    (0x3fa5555555555555, 0x3c455194d28275da),
257    (0x3f81111111111111, 0x3c012faa0e1c0f7b),
258    (0x3f56c16c16da6973, 0xbbf4ba45ab25d2a3),
259    (0x3f2a01a019eb7f31, 0xbbc9091d845ecd36),
260];
261
262#[inline]
263pub(crate) fn opoly_dd_generic<const N: usize>(
264    x: DoubleDouble,
265    poly: [(u64, u64); N],
266) -> DoubleDouble {
267    let zch = poly.last().unwrap();
268    let p0 = DoubleDouble::from_exact_add(f64::from_bits(zch.0), x.lo);
269    let ach = p0.hi;
270    let acl = f64::from_bits(zch.1) + p0.lo;
271    let mut ch = DoubleDouble::new(acl, ach);
272
273    for zch in poly.iter().rev().skip(1) {
274        ch = DoubleDouble::quick_mult_f64(ch, x.hi);
275        let z0 = DoubleDouble::from_bit_pair(*zch);
276        ch = DoubleDouble::add(z0, ch);
277    }
278
279    ch
280}
281
282#[cold]
283fn as_expm1_accurate(x: f64) -> f64 {
284    let mut ix;
285    if x.abs() < 0.25 {
286        const CL: [u64; 6] = [
287            0x3da93974a8ca5354,
288            0x3d6ae7f3e71e4908,
289            0x3d2ae7f357341648,
290            0x3ce952c7f96664cb,
291            0x3ca686f8ce633aae,
292            0x3c62f49b2fbfb5b6,
293        ];
294
295        let fl0 = f_fmla(x, f64::from_bits(CL[5]), f64::from_bits(CL[4]));
296        let fl1 = f_fmla(x, fl0, f64::from_bits(CL[3]));
297        let fl2 = f_fmla(x, fl1, f64::from_bits(CL[2]));
298        let fl3 = f_fmla(x, fl2, f64::from_bits(CL[1]));
299
300        let fl = x * f_fmla(x, fl3, f64::from_bits(CL[0]));
301        let mut f = opoly_dd_generic(DoubleDouble::new(fl, x), EXPM1_DD1);
302        f = DoubleDouble::quick_mult_f64(f, x);
303        f = DoubleDouble::quick_mult_f64(f, x);
304        f = DoubleDouble::quick_mult_f64(f, x);
305        let hx = 0.5 * x;
306        let dx2dd = DoubleDouble::from_exact_mult(x, hx);
307        f = DoubleDouble::add(dx2dd, f);
308        let v0 = DoubleDouble::from_exact_add(x, f.hi);
309        let v1 = DoubleDouble::from_exact_add(v0.lo, f.lo);
310        let v2 = DoubleDouble::from_exact_add(v0.hi, v1.hi);
311        let mut v3 = DoubleDouble::from_exact_add(v2.lo, v1.lo);
312        ix = v3.hi.to_bits();
313        if (ix & 0x000fffffffffffff) == 0 {
314            let v = v3.lo.to_bits();
315            let d: i64 = ((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64)
316                .wrapping_shl(1)
317                .wrapping_add(1) as i64;
318            ix = ix.wrapping_add(d as u64);
319            v3.hi = f64::from_bits(ix);
320        }
321        v3.hi + v2.hi
322    } else {
323        const S: f64 = f64::from_bits(0x40b71547652b82fe);
324        let t = (x * S).round_ties_even_finite();
325        let jt: i64 = t as i64;
326        let i0 = (jt >> 6) & 0x3f;
327        let i1 = jt & 0x3f;
328        let ie = jt >> 12;
329        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
330        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
331
332        let bt = DoubleDouble::quick_mult(t0, t1);
333
334        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
335        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
336        const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
337
338        let dx = f_fmla(-L2H, t, x);
339        let dxl = L2L * t;
340        let dxll = f_fmla(L2LL, t, dd_fmla(L2L, t, -dxl));
341        let dxh = dx + dxl;
342        let dxl = (dx - dxh) + dxl + dxll;
343        let mut f = poly_dekker_generic(DoubleDouble::new(dxl, dxh), EXPM1_DD2);
344        f = DoubleDouble::quick_mult(DoubleDouble::new(dxl, dxh), f);
345        f = DoubleDouble::quick_mult(f, bt);
346        f = DoubleDouble::add(bt, f);
347        let off: u64 = (2048i64 + 1023i64).wrapping_sub(ie).wrapping_shl(52) as u64;
348        let e: f64;
349        if ie < 53 {
350            let fhz = DoubleDouble::from_exact_add(f64::from_bits(off), f.hi);
351            f.hi = fhz.hi;
352            e = fhz.lo;
353        } else if ie < 104 {
354            let fhz = DoubleDouble::from_exact_add(f.hi, f64::from_bits(off));
355            f.hi = fhz.hi;
356            e = fhz.lo;
357        } else {
358            e = 0.;
359        }
360        f.lo += e;
361        let dst = DoubleDouble::from_exact_add(f.hi, f.lo);
362        fast_ldexp(dst.hi, ie as i32)
363    }
364}
365
366/// Computes e^x - 1
367///
368/// Max found ULP 0.5
369pub fn f_expm1(x: f64) -> f64 {
370    let ix = x.to_bits();
371    let aix: u64 = ix & 0x7fff_ffff_ffff_ffff;
372    if aix < 0x3fd0000000000000u64 {
373        if aix < 0x3ca0000000000000u64 {
374            if aix == 0 {
375                return x;
376            }
377            return dyad_fmla(f64::from_bits(0x3c90000000000000), x.abs(), x);
378        }
379        let sx = f64::from_bits(0x4060000000000000) * x;
380        let fx = sx.round_ties_even_finite();
381        let z = sx - fx;
382        let z2 = z * z;
383        let i: i64 = unsafe {
384            fx.to_int_unchecked::<i64>() // fx is already integer, this is just a conversion
385        };
386        let t = DoubleDouble::from_bit_pair(TZ[i.wrapping_add(32) as usize]);
387        const C: [u64; 6] = [
388            0x3f80000000000000,
389            0x3f00000000000000,
390            0x3e755555555551ad,
391            0x3de555555555599c,
392            0x3d511111ad1ad69d,
393            0x3cb6c16c168b1fb5,
394        ];
395        let fh = z * f64::from_bits(C[0]);
396
397        let fl0 = f_fmla(z, f64::from_bits(C[5]), f64::from_bits(C[4]));
398        let fl1 = f_fmla(z, f64::from_bits(C[2]), f64::from_bits(C[1]));
399
400        let fw0 = f_fmla(z, fl0, f64::from_bits(C[3]));
401
402        let fl = z2 * f_fmla(z2, fw0, fl1);
403        let mut f = DoubleDouble::new(fl, fh);
404        let e0 = f64::from_bits(0x3bea000000000000);
405        let eps = z2 * e0 + f64::from_bits(0x3970000000000000);
406        let mut r = DoubleDouble::from_exact_add(t.hi, f.hi);
407        r.lo += t.lo + f.lo;
408        f = DoubleDouble::quick_mult(t, f);
409        f = DoubleDouble::add(r, f);
410        let ub = f.hi + (f.lo + eps);
411        let lb = f.hi + (f.lo - eps);
412        if ub != lb {
413            return as_expm1_accurate(x);
414        }
415        lb
416    } else {
417        if aix >= 0x40862e42fefa39f0u64 {
418            if aix > 0x7ff0000000000000u64 {
419                return x + x;
420            } // nan
421            if aix == 0x7ff0000000000000u64 {
422                return if (ix >> 63) != 0 { -1.0 } else { x };
423            }
424            if (ix >> 63) == 0 {
425                const Z: f64 = f64::from_bits(0x7fe0000000000000);
426                return black_box(Z) * black_box(Z);
427            }
428        }
429        if ix >= 0xc0425e4f7b2737fau64 {
430            if ix >= 0xc042b708872320e2u64 {
431                return black_box(-1.0) + black_box(f64::from_bits(0x3c80000000000000));
432            }
433            return (f64::from_bits(0x40425e4f7b2737fa) + x + f64::from_bits(0x3cc8486612173c69))
434                * f64::from_bits(0x3c971547652b82fe)
435                - f64::from_bits(0x3fefffffffffffff);
436        }
437
438        const S: f64 = f64::from_bits(0x40b71547652b82fe);
439        let t = (x * S).round_ties_even_finite();
440        let jt: i64 = unsafe {
441            t.to_int_unchecked::<i64>() // t is already integer, this is just a conversion
442        };
443        let i0 = (jt >> 6) & 0x3f;
444        let i1 = jt & 0x3f;
445        let ie = jt >> 12;
446        let t0 = DoubleDouble::from_bit_pair(EXPM1_T0[i0 as usize]);
447        let t1 = DoubleDouble::from_bit_pair(EXPM1_T1[i1 as usize]);
448
449        let bt = DoubleDouble::quick_mult(t0, t1);
450
451        const L2H: f64 = f64::from_bits(0x3f262e42ff000000);
452        const L2L: f64 = f64::from_bits(0x3d0718432a1b0e26);
453        let dx = dd_fmla(L2L, t, f_fmla(-L2H, t, x));
454        let dx2 = dx * dx;
455
456        const CH: [u64; 4] = [
457            0x3ff0000000000000,
458            0x3fe0000000000000,
459            0x3fc55555557e54ff,
460            0x3fa55555553a12f4,
461        ];
462
463        let p0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
464        let p1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
465
466        let p = f_fmla(dx2, p0, p1);
467        let mut fh = bt.hi;
468        let tx = bt.hi * dx;
469        let mut fl = f_fmla(tx, p, bt.lo);
470        let eps = f64::from_bits(0x3c0833beace2b6fe) * bt.hi;
471
472        let off: u64 = ((2048i64 + 1023i64).wrapping_sub(ie) as u64).wrapping_shl(52);
473        let e: f64;
474        if ie < 53 {
475            let flz = DoubleDouble::from_exact_add(f64::from_bits(off), fh);
476            e = flz.lo;
477            fh = flz.hi;
478        } else if ie < 75 {
479            let flz = DoubleDouble::from_exact_add(fh, f64::from_bits(off));
480            e = flz.lo;
481            fh = flz.hi;
482        } else {
483            e = 0.;
484        }
485        fl += e;
486        let ub = fh + (fl + eps);
487        let lb = fh + (fl - eps);
488        if ub != lb {
489            return as_expm1_accurate(x);
490        }
491        fast_ldexp(lb, ie as i32)
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498
499    #[test]
500    fn test_expm1() {
501        assert_eq!(f_expm1(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864),
502                   0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000028321017343872864 );
503        assert_eq!(f_expm1(36.52188110363568), 7265264535836525.);
504        assert_eq!(f_expm1(2.), 6.38905609893065);
505        assert_eq!(f_expm1(0.4321321), 0.5405386068701713);
506        assert_eq!(f_expm1(-0.0000004321321), -4.321320066309375e-7);
507    }
508}