1use crate::common::{f_fmla, fmla, pow2i, rintk};
30use crate::double_double::DoubleDouble;
31use crate::exponents::auxiliary::fast_ldexp;
32use crate::round::RoundFinite;
33
34#[inline]
37pub const fn exp(d: f64) -> f64 {
38 const EXP_POLY_1_D: f64 = 2f64;
39 const EXP_POLY_2_D: f64 = 0.16666666666666674f64;
40 const EXP_POLY_3_D: f64 = -0.0027777777777777614f64;
41 const EXP_POLY_4_D: f64 = 6.613756613755705e-5f64;
42 const EXP_POLY_5_D: f64 = -1.6534391534392554e-6f64;
43 const EXP_POLY_6_D: f64 = 4.17535139757361979584e-8f64;
44
45 const L2_U: f64 = 0.693_147_180_559_662_956_511_601_805_686_950_683_593_75;
46 const L2_L: f64 = 0.282_352_905_630_315_771_225_884_481_750_134_360_255_254_120_68_e-12;
47 const R_LN2: f64 =
48 1.442_695_040_888_963_407_359_924_681_001_892_137_426_645_954_152_985_934_135_449_406_931;
49
50 let qf = rintk(d * R_LN2);
51 let q = qf as i32;
52
53 let mut r = fmla(qf, -L2_U, d);
54 r = fmla(qf, -L2_L, r);
55
56 let f = r * r;
57 let mut u = EXP_POLY_6_D;
59 u = fmla(u, f, EXP_POLY_5_D);
60 u = fmla(u, f, EXP_POLY_4_D);
61 u = fmla(u, f, EXP_POLY_3_D);
62 u = fmla(u, f, EXP_POLY_2_D);
63 u = fmla(u, f, EXP_POLY_1_D);
64 let u = 1f64 + 2f64 * r / (u - r);
65 let i2 = pow2i(q);
66 u * i2
67 }
74
75pub(crate) static EXP_REDUCE_T0: [(u64, u64); 64] = [
76 (0x0000000000000000, 0x3ff0000000000000),
77 (0xbc719083535b085e, 0x3ff02c9a3e778061),
78 (0x3c8d73e2a475b466, 0x3ff059b0d3158574),
79 (0x3c6186be4bb28500, 0x3ff0874518759bc8),
80 (0x3c98a62e4adc610a, 0x3ff0b5586cf9890f),
81 (0x3c403a1727c57b52, 0x3ff0e3ec32d3d1a2),
82 (0xbc96c51039449b3a, 0x3ff11301d0125b51),
83 (0xbc932fbf9af1369e, 0x3ff1429aaea92de0),
84 (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
85 (0x3c8e5b4c7b4968e4, 0x3ff1a35beb6fcb75),
86 (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
87 (0x3c8dc775814a8494, 0x3ff2063b88628cd6),
88 (0x3c99b07eb6c70572, 0x3ff2387a6e756238),
89 (0x3c82bd339940e9da, 0x3ff26b4565e27cdd),
90 (0x3c8612e8afad1256, 0x3ff29e9df51fdee1),
91 (0x3c90024754db41d4, 0x3ff2d285a6e4030b),
92 (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
93 (0x3c932721843659a6, 0x3ff33c08b26416ff),
94 (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
95 (0xbc75e436d661f5e2, 0x3ff3a7db34e59ff7),
96 (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
97 (0xbc5ef3691c309278, 0x3ff4160a21f72e2a),
98 (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
99 (0x3c73c1a3b69062f0, 0x3ff486a2b5c13cd0),
100 (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
101 (0xbc94b309d25957e4, 0x3ff4f9b2769d2ca7),
102 (0xbc807abe1db13cac, 0x3ff5342b569d4f82),
103 (0x3c99bb2c011d93ac, 0x3ff56f4736b527da),
104 (0x3c96324c054647ac, 0x3ff5ab07dd485429),
105 (0x3c9ba6f93080e65e, 0x3ff5e76f15ad2148),
106 (0xbc9383c17e40b496, 0x3ff6247eb03a5585),
107 (0xbc9bb60987591c34, 0x3ff6623882552225),
108 (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
109 (0xbc6bbe3a683c88aa, 0x3ff6dfb23c651a2f),
110 (0xbc816e4786887a9a, 0x3ff71f75e8ec5f74),
111 (0xbc90245957316dd4, 0x3ff75feb564267c9),
112 (0xbc841577ee049930, 0x3ff7a11473eb0187),
113 (0x3c705d02ba15797e, 0x3ff7e2f336cf4e62),
114 (0xbc9d4c1dd41532d8, 0x3ff82589994cce13),
115 (0xbc9fc6f89bd4f6ba, 0x3ff868d99b4492ed),
116 (0x3c96e9f156864b26, 0x3ff8ace5422aa0db),
117 (0x3c85cc13a2e3976c, 0x3ff8f1ae99157736),
118 (0xbc675fc781b57ebc, 0x3ff93737b0cdc5e5),
119 (0xbc9d185b7c1b85d0, 0x3ff97d829fde4e50),
120 (0x3c7c7c46b071f2be, 0x3ff9c49182a3f090),
121 (0xbc9359495d1cd532, 0x3ffa0c667b5de565),
122 (0xbc9d2f6edb8d41e2, 0x3ffa5503b23e255d),
123 (0x3c90fac90ef7fd32, 0x3ffa9e6b5579fdbf),
124 (0x3c97a1cd345dcc82, 0x3ffae89f995ad3ad),
125 (0xbc62805e3084d708, 0x3ffb33a2b84f15fb),
126 (0xbc75584f7e54ac3a, 0x3ffb7f76f2fb5e47),
127 (0x3c823dd07a2d9e84, 0x3ffbcc1e904bc1d2),
128 (0x3c811065895048de, 0x3ffc199bdd85529c),
129 (0x3c92884dff483cac, 0x3ffc67f12e57d14b),
130 (0x3c7503cbd1e949dc, 0x3ffcb720dcef9069),
131 (0xbc9cbc3743797a9c, 0x3ffd072d4a07897c),
132 (0x3c82ed02d75b3706, 0x3ffd5818dcfba487),
133 (0x3c9c2300696db532, 0x3ffda9e603db3285),
134 (0xbc91a5cd4f184b5c, 0x3ffdfc97337b9b5f),
135 (0x3c839e8980a9cc90, 0x3ffe502ee78b3ff6),
136 (0xbc9e9c23179c2894, 0x3ffea4afa2a490da),
137 (0x3c9dc7f486a4b6b0, 0x3ffefa1bee615a27),
138 (0x3c99d3e12dd8a18a, 0x3fff50765b6e4540),
139 (0x3c874853f3a5931e, 0x3fffa7c1819e90d8),
140];
141
142pub(crate) static EXP_REDUCE_T1: [(u64, u64); 64] = [
143 (0x0000000000000000, 0x3ff0000000000000),
144 (0x3c9ae8e38c59c72a, 0x3ff000b175effdc7),
145 (0xbc57b5d0d58ea8f4, 0x3ff00162f3904052),
146 (0x3c94115cb6b16a8e, 0x3ff0021478e11ce6),
147 (0xbc8d7c96f201bb2e, 0x3ff002c605e2e8cf),
148 (0x3c984711d4c35ea0, 0x3ff003779a95f959),
149 (0xbc80484245243778, 0x3ff0042936faa3d8),
150 (0xbc94b237da2025fa, 0x3ff004dadb113da0),
151 (0xbc75e00e62d6b30e, 0x3ff0058c86da1c0a),
152 (0x3c9a1d6cedbb9480, 0x3ff0063e3a559473),
153 (0xbc94acf197a00142, 0x3ff006eff583fc3d),
154 (0xbc6eaf2ea42391a6, 0x3ff007a1b865a8ca),
155 (0x3c7da93f90835f76, 0x3ff0085382faef83),
156 (0xbc86a79084ab093c, 0x3ff00905554425d4),
157 (0x3c986364f8fbe8f8, 0x3ff009b72f41a12b),
158 (0xbc882e8e14e3110e, 0x3ff00a6910f3b6fd),
159 (0xbc84f6b2a7609f72, 0x3ff00b1afa5abcbf),
160 (0xbc7e1a258ea8f71a, 0x3ff00bcceb7707ec),
161 (0x3c74362ca5bc26f2, 0x3ff00c7ee448ee02),
162 (0x3c9095a56c919d02, 0x3ff00d30e4d0c483),
163 (0xbc6406ac4e81a646, 0x3ff00de2ed0ee0f5),
164 (0x3c9b5a6902767e08, 0x3ff00e94fd0398e0),
165 (0xbc991b2060859320, 0x3ff00f4714af41d3),
166 (0x3c8427068ab22306, 0x3ff00ff93412315c),
167 (0x3c9c1d0660524e08, 0x3ff010ab5b2cbd11),
168 (0xbc9e7bdfb3204be8, 0x3ff0115d89ff3a8b),
169 (0x3c8843aa8b9cbbc6, 0x3ff0120fc089ff63),
170 (0xbc734104ee7edae8, 0x3ff012c1fecd613b),
171 (0xbc72b6aeb6176892, 0x3ff0137444c9b5b5),
172 (0x3c7a8cd33b8a1bb2, 0x3ff01426927f5278),
173 (0x3c72edc08e5da99a, 0x3ff014d8e7ee8d2f),
174 (0x3c857ba2dc7e0c72, 0x3ff0158b4517bb88),
175 (0x3c9b61299ab8cdb8, 0x3ff0163da9fb3335),
176 (0xbc990565902c5f44, 0x3ff016f0169949ed),
177 (0x3c870fc41c5c2d54, 0x3ff017a28af25567),
178 (0x3c94b9a6e145d76c, 0x3ff018550706ab62),
179 (0xbc7008eff5142bfa, 0x3ff019078ad6a19f),
180 (0xbc977669f033c7de, 0x3ff019ba16628de2),
181 (0xbc909bb78eeead0a, 0x3ff01a6ca9aac5f3),
182 (0x3c9371231477ece6, 0x3ff01b1f44af9f9e),
183 (0x3c75e7626621eb5a, 0x3ff01bd1e77170b4),
184 (0xbc9bc72b100828a4, 0x3ff01c8491f08f08),
185 (0xbc6ce39cbbab8bbe, 0x3ff01d37442d5070),
186 (0x3c816996709da2e2, 0x3ff01de9fe280ac8),
187 (0xbc8c11f5239bf536, 0x3ff01e9cbfe113ef),
188 (0x3c8e1d4eb5edc6b4, 0x3ff01f4f8958c1c6),
189 (0xbc9afb99946ee3f0, 0x3ff020025a8f6a35),
190 (0xbc98f06d8a148a32, 0x3ff020b533856324),
191 (0xbc82bf310fc54eb6, 0x3ff02168143b0281),
192 (0xbc9c95a035eb4176, 0x3ff0221afcb09e3e),
193 (0xbc9491793e46834c, 0x3ff022cdece68c4f),
194 (0xbc73e8d0d9c49090, 0x3ff02380e4dd22ad),
195 (0xbc9314aa16278aa4, 0x3ff02433e494b755),
196 (0x3c848daf888e9650, 0x3ff024e6ec0da046),
197 (0x3c856dc8046821f4, 0x3ff02599fb483385),
198 (0x3c945b42356b9d46, 0x3ff0264d1244c719),
199 (0xbc7082ef51b61d7e, 0x3ff027003103b10e),
200 (0x3c72106ed0920a34, 0x3ff027b357854772),
201 (0xbc9fd4cf26ea5d0e, 0x3ff0286685c9e059),
202 (0xbc909f8775e78084, 0x3ff02919bbd1d1d8),
203 (0x3c564cbba902ca28, 0x3ff029ccf99d720a),
204 (0x3c94383ef231d206, 0x3ff02a803f2d170d),
205 (0x3c94a47a505b3a46, 0x3ff02b338c811703),
206 (0x3c9e471202234680, 0x3ff02be6e199c811),
207];
208
209#[inline]
211pub(crate) fn to_denormal(x: f64) -> f64 {
212 let mut ix = x.to_bits();
213 ix &= 0x000fffffffffffff;
214 f64::from_bits(ix)
215}
216
217#[inline]
218fn exp_poly_dd(z: DoubleDouble) -> DoubleDouble {
219 const C: [(u64, u64); 7] = [
220 (0x0000000000000000, 0x3ff0000000000000),
221 (0x39c712f72ecec2cf, 0x3fe0000000000000),
222 (0x3c65555555554d07, 0x3fc5555555555555),
223 (0x3c455194d28275da, 0x3fa5555555555555),
224 (0x3c012faa0e1c0f7b, 0x3f81111111111111),
225 (0xbbf4ba45ab25d2a3, 0x3f56c16c16da6973),
226 (0xbbc9091d845ecd36, 0x3f2a01a019eb7f31),
227 ];
228 let mut r = DoubleDouble::quick_mul_add(
229 DoubleDouble::from_bit_pair(C[6]),
230 z,
231 DoubleDouble::from_bit_pair(C[5]),
232 );
233 r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[4]));
234 r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[3]));
235 r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[2]));
236 r = DoubleDouble::quick_mul_add(r, z, DoubleDouble::from_bit_pair(C[1]));
237 DoubleDouble::quick_mul_add_f64(r, z, f64::from_bits(0x3ff0000000000000))
238}
239
240#[cold]
241fn as_exp_accurate(x: f64, t: f64, tz: DoubleDouble, ie: i64) -> f64 {
242 let mut ix = x.to_bits();
243 if ((ix >> 52) & 0x7ff) < 0x3c9 {
244 return 1. + x;
245 }
246
247 const L2: DoubleDouble = DoubleDouble::new(
250 f64::from_bits(0x3d0718432a1b0e26),
251 f64::from_bits(0x3f262e42ff000000),
252 );
253 const L2LL: f64 = f64::from_bits(0x3999ff0342542fc3);
254 let dx = f_fmla(-L2.hi, t, x);
255 let dx_dd = DoubleDouble::quick_mult_f64(DoubleDouble::new(L2LL, L2.lo), t);
256 let dz = DoubleDouble::full_add_f64(dx_dd, dx);
257 let mut f = exp_poly_dd(dz);
258 f = DoubleDouble::quick_mult(dz, f);
259 if ix > 0xc086232bdd7abcd2u64 {
260 ix = 1i64.wrapping_sub(ie).wrapping_shl(52) as u64;
262 f = DoubleDouble::quick_mult(f, tz);
263 f = DoubleDouble::add(tz, f);
264
265 let new_f = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
266 f.lo += new_f.lo;
267 f.hi = to_denormal(f.hi + f.lo);
268 } else {
269 if tz.hi == 1.0 {
270 let fhe = DoubleDouble::from_exact_add(tz.hi, f.hi);
271 let fhl = DoubleDouble::from_exact_add(fhe.lo, f.lo);
272 f.hi = fhe.hi;
273 f.lo = fhl.hi;
274 ix = f.lo.to_bits();
275 if (ix & 0x000fffffffffffff) == 0 {
276 let v = fhl.lo.to_bits();
277 let d: i64 = (((((ix as i64) >> 63) ^ ((v as i64) >> 63)) as u64).wrapping_shl(1)
278 as i64)
279 .wrapping_add(1);
280 ix = ix.wrapping_add(d as u64);
281 f.lo = f64::from_bits(ix);
282 }
283 } else {
284 f = DoubleDouble::quick_mult(f, tz);
285 f = DoubleDouble::add(tz, f);
286 }
287 f = DoubleDouble::from_exact_add(f.hi, f.lo);
288 f.hi = fast_ldexp(f.hi, ie as i32);
289 }
290 f.hi
291}
292
293pub fn f_exp(x: f64) -> f64 {
297 let mut ix = x.to_bits();
298 let aix = ix & 0x7fffffffffffffff;
299 if aix <= 0x3c90000000000000u64 {
301 return 1.0 + x;
303 }
304 if aix >= 0x40862e42fefa39f0u64 {
305 if aix > 0x7ff0000000000000u64 {
307 return x + x;
308 } if aix == 0x7ff0000000000000u64 {
310 return if (ix >> 63) != 0 {
312 0.0 } else {
314 x };
316 }
317 if (ix >> 63) == 0 {
318 let z = std::hint::black_box(f64::from_bits(0x7fe0000000000000));
320 return z * z;
321 }
322 if aix >= 0x40874910d52d3052u64 {
323 return f64::from_bits(0x18000000000000) * f64::from_bits(0x3c80000000000000);
325 }
326 }
327 const S: f64 = f64::from_bits(0x40b71547652b82fe);
328 let t = (x * S).round_finite();
329 let jt: i64 = unsafe {
330 t.to_int_unchecked::<i64>() };
332 let i0: i64 = (jt >> 6) & 0x3f;
333 let i1 = jt & 0x3f;
334 let ie: i64 = jt >> 12;
335 let t0 = DoubleDouble::from_bit_pair(EXP_REDUCE_T0[i0 as usize]);
336 let t1 = DoubleDouble::from_bit_pair(EXP_REDUCE_T1[i1 as usize]);
337 let tz = DoubleDouble::quick_mult(t0, t1);
338
339 const L2: DoubleDouble = DoubleDouble::new(
340 f64::from_bits(0x3d0718432a1b0e26),
341 f64::from_bits(0x3f262e42ff000000),
342 );
343
344 let dx = f_fmla(L2.lo, t, f_fmla(-L2.hi, t, x));
347 let dx2 = dx * dx;
348 const CH: [u64; 4] = [
349 0x3ff0000000000000,
350 0x3fe0000000000000,
351 0x3fc55555557e54ff,
352 0x3fa55555553a12f4,
353 ];
354
355 let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
356 let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
357
358 let p = f_fmla(dx2, pw0, pw1);
359 let mut f = DoubleDouble::new(f_fmla(tz.hi * dx, p, tz.lo), tz.hi);
360 const EPS: f64 = f64::from_bits(0x3c0833beace2b6fe);
361 if ix > 0xc086232bdd7abcd2u64 {
362 ix = 1u64.wrapping_sub(ie as u64).wrapping_shl(52);
364 let sums = DoubleDouble::from_exact_add(f64::from_bits(ix), f.hi);
365 f.hi = sums.hi;
366 f.lo += sums.lo;
367 let ub = f.hi + (f.lo + EPS);
368 let lb = f.hi + (f.lo - EPS);
369 if ub != lb {
370 return as_exp_accurate(x, t, tz, ie);
371 }
372 f.hi = to_denormal(lb);
373 } else {
374 let ub = f.hi + (f.lo + EPS);
375 let lb = f.hi + (f.lo - EPS);
376 if ub != lb {
377 return as_exp_accurate(x, t, tz, ie);
378 }
379 f.hi = fast_ldexp(lb, ie as i32);
380 }
381 f.hi
382}
383
384#[cfg(test)]
385mod tests {
386 use super::*;
387
388 #[test]
389 fn exp_test() {
390 assert!(
391 (exp(0f64) - 1f64).abs() < 1e-8,
392 "Invalid result {}",
393 exp(0f64)
394 );
395 assert!(
396 (exp(5f64) - 148.4131591025766034211155800405522796f64).abs() < 1e-8,
397 "Invalid result {}",
398 exp(5f64)
399 );
400 }
401
402 #[test]
403 fn f_exp_test() {
404 assert_eq!(f_exp(0.000000014901161193847656), 1.0000000149011614);
405 assert_eq!(f_exp(0.), 1.);
406 assert_eq!(f_exp(5f64), 148.4131591025766034211155800405522796f64);
407 assert_eq!(f_exp(f64::INFINITY), f64::INFINITY);
408 assert_eq!(f_exp(f64::NEG_INFINITY), 0.);
409 assert!(f_exp(f64::NAN).is_nan());
410 }
411}