1use crate::common::{f_fmla, f_fmlaf, fmlaf, pow2if, rintfk};
30use crate::polyeval::f_polyeval5;
31use crate::round::RoundFinite;
32
33const L2U_F: f32 = 0.693_145_751_953_125;
34const L2L_F: f32 = 1.428_606_765_330_187_045_e-6;
35const R_LN2_F: f32 = std::f32::consts::LOG2_E;
36
37#[inline]
40pub const fn expf(d: f32) -> f32 {
41 const EXP_POLY_1_S: f32 = 2f32;
42 const EXP_POLY_2_S: f32 = 0.16666707f32;
43 const EXP_POLY_3_S: f32 = -0.002775669f32;
44 let qf = rintfk(d * R_LN2_F);
45 let q = qf as i32;
46 let r = fmlaf(qf, -L2U_F, d);
47 let r = fmlaf(qf, -L2L_F, r);
48
49 let f = r * r;
50 let mut u = EXP_POLY_3_S;
52 u = fmlaf(u, f, EXP_POLY_2_S);
53 u = fmlaf(u, f, EXP_POLY_1_S);
54 let u = 1f32 + 2f32 * r / (u - r);
55 let i2 = pow2if(q);
56 u * i2
57 }
64
65pub(crate) static EXP_M1: [u64; 207] = [
72 0x368f1e6b68529e33,
73 0x36a525be4e4e601d,
74 0x36bcbe0a45f75eb1,
75 0x36d3884e838aea68,
76 0x36ea8c1f14e2af5d,
77 0x37020a717e64a9bd,
78 0x3718851d84118908,
79 0x3730a9bdfb02d240,
80 0x3746a5bea046b42e,
81 0x375ec7f3b269efa8,
82 0x3774eafb87eab0f2,
83 0x378c6e2d05bbc000,
84 0x37a35208867c2683,
85 0x37ba425b317eeacd,
86 0x37d1d8508fa8246a,
87 0x37e840fbc08fdc8a,
88 0x38007b7112bc1ffe,
89 0x381666d0dad2961d,
90 0x382e726c3f64d0fe,
91 0x3844b0dc07cabf98,
92 0x385c1f2daf3b6a46,
93 0x38731c5957a47de2,
94 0x3889f96445648b9f,
95 0x38a1a6baeadb4fd1,
96 0x38b7fd974d372e45,
97 0x38d04da4d1452919,
98 0x38e62891f06b3450,
99 0x38fe1dd273aa8a4a,
100 0x3914775e0840bfdd,
101 0x392bd109d9d94bda,
102 0x3942e73f53fba844,
103 0x3959b138170d6bfe,
104 0x397175af0cf60ec5,
105 0x3987baee1bffa80b,
106 0x39a02057d1245ceb,
107 0x39b5eafffb34ba31,
108 0x39cdca23bae16424,
109 0x39e43e7fc88b8056,
110 0x39fb83bf23a9a9eb,
111 0x3a12b2b8dd05b318,
112 0x3a2969d47321e4cc,
113 0x3a41452b7723aed2,
114 0x3a5778fe2497184c,
115 0x3a6fe7116182e9cc,
116 0x3a85ae191a99585a,
117 0x3a9d775d87da854d,
118 0x3ab4063f8cc8bb98,
119 0x3acb374b315f87c1,
120 0x3ae27ec458c65e3c,
121 0x3af923372c67a074,
122 0x3b11152eaeb73c08,
123 0x3b2737c5645114b5,
124 0x3b3f8e6c24b5592e,
125 0x3b5571db733a9d61,
126 0x3b6d257d547e083f,
127 0x3b83ce9b9de78f85,
128 0x3b9aebabae3a41b5,
129 0x3bb24b6031b49bda,
130 0x3bc8dd5e1bb09d7e,
131 0x3be0e5b73d1ff53d,
132 0x3bf6f741de1748ec,
133 0x3c0f36bd37f42f3e,
134 0x3c2536452ee2f75c,
135 0x3c3cd480a1b74820,
136 0x3c539792499b1a24,
137 0x3c6aa0de4bf35b38,
138 0x3c82188ad6ae3303,
139 0x3c9898471fca6055,
140 0x3cb0b6c3afdde064,
141 0x3cc6b7719a59f0e0,
142 0x3cdee001eed62aa0,
143 0x3cf4fb547c775da8,
144 0x3d0c8464f7616468,
145 0x3d236121e24d3bba,
146 0x3d3a56e0c2ac7f75,
147 0x3d51e642baeb84a0,
148 0x3d6853f01d6d53ba,
149 0x3d80885298767e9a,
150 0x3d967852a7007e42,
151 0x3dae8a37a45fc32e,
152 0x3dc4c1078fe9228a,
153 0x3ddc3527e433fab1,
154 0x3df32b48bf117da2,
155 0x3e0a0db0d0ddb3ec,
156 0x3e21b48655f37267,
157 0x3e381056ff2c5772,
158 0x3e505a628c699fa1,
159 0x3e6639e3175a689d,
160 0x3e7e355bbaee85cb,
161 0x3e94875ca227ec38,
162 0x3eabe6c6fdb01612,
163 0x3ec2f6053b981d98,
164 0x3ed9c54c3b43bc8b,
165 0x3ef18354238f6764,
166 0x3f07cd79b5647c9b,
167 0x3f202cf22526545a,
168 0x3f35fc21041027ad,
169 0x3f4de16b9c24a98f,
170 0x3f644e51f113d4d6,
171 0x3f7b993fe00d5376,
172 0x3f92c155b8213cf4,
173 0x3fa97db0ccceb0af,
174 0x3fc152aaa3bf81cc,
175 0x3fd78b56362cef38,
176 0x3ff0000000000000,
177 0x4005bf0a8b145769,
178 0x401d8e64b8d4ddae,
179 0x403415e5bf6fb106,
180 0x404b4c902e273a58,
181 0x40628d389970338f,
182 0x407936dc5690c08f,
183 0x409122885aaeddaa,
184 0x40a749ea7d470c6e,
185 0x40bfa7157c470f82,
186 0x40d5829dcf950560,
187 0x40ed3c4488ee4f7f,
188 0x4103de1654d37c9a,
189 0x411b00b5916ac955,
190 0x413259ac48bf05d7,
191 0x4148f0ccafad2a87,
192 0x4160f2ebd0a80020,
193 0x417709348c0ea4f9,
194 0x418f4f22091940bd,
195 0x41a546d8f9ed26e1,
196 0x41bceb088b68e804,
197 0x41d3a6e1fd9eecfd,
198 0x41eab5adb9c43600,
199 0x420226af33b1fdc1,
200 0x4218ab7fb5475fb7,
201 0x4230c3d3920962c9,
202 0x4246c932696a6b5d,
203 0x425ef822f7f6731d,
204 0x42750bba3796379a,
205 0x428c9aae4631c056,
206 0x42a370470aec28ed,
207 0x42ba6b765d8cdf6d,
208 0x42d1f43fcc4b662c,
209 0x42e866f34a725782,
210 0x4300953e2f3a1ef7,
211 0x431689e221bc8d5b,
212 0x432ea215a1d20d76,
213 0x4344d13fbb1a001a,
214 0x435c4b334617cc67,
215 0x43733a43d282a519,
216 0x438a220d397972eb,
217 0x43a1c25c88df6862,
218 0x43b8232558201159,
219 0x43d0672a3c9eb871,
220 0x43e64b41c6d37832,
221 0x43fe4cf766fe49be,
222 0x44149767bc0483e3,
223 0x442bfc951eb8bb76,
224 0x444304d6aeca254b,
225 0x4459d97010884251,
226 0x44719103e4080b45,
227 0x4487e013cd114461,
228 0x44a03996528e074c,
229 0x44b60d4f6fdac731,
230 0x44cdf8c5af17ba3b,
231 0x44e45e3076d61699,
232 0x44fbaed16a6e0da7,
233 0x4512cffdfebde1a1,
234 0x4529919cabefcb69,
235 0x454160345c9953e3,
236 0x45579dbc9dc53c66,
237 0x45700c810d464097,
238 0x4585d009394c5c27,
239 0x459da57de8f107a8,
240 0x45b425982cf597cd,
241 0x45cb61e5ca3a5e31,
242 0x45e29bb825dfcf87,
243 0x45f94a90db0d6fe2,
244 0x46112fec759586fd,
245 0x46275c1dc469e3af,
246 0x463fbfd219c43b04,
247 0x4655936d44e1a146,
248 0x466d531d8a7ee79c,
249 0x4683ed9d24a2d51b,
250 0x469b15cfe5b6e17b,
251 0x46b268038c2c0e00,
252 0x46c9044a73545d48,
253 0x46e1002ab6218b38,
254 0x46f71b3540cbf921,
255 0x470f6799ea9c414a,
256 0x47255779b984f3eb,
257 0x473d01a210c44aa4,
258 0x4753b63da8e91210,
259 0x476aca8d6b0116b8,
260 0x478234de9e0c74e9,
261 0x4798bec7503ca477,
262 0x47b0d0eda9796b90,
263 0x47c6db0118477245,
264 0x47df1056dc7bf22d,
265 0x47f51c2cc3433801,
266 0x480cb108ffbec164,
267 0x48237f780991b584,
268 0x483a801c0ea8ac4d,
269 0x48520247cc4c46c1,
270 0x48687a0553328015,
271 0x4880a233dee4f9bb,
272 0x48969b7f55b808ba,
273 0x48aeba064644060a,
274 0x48c4e184933d9364,
275 0x48dc614fe2531841,
276 0x48f3494a9b171bf5,
277 0x490a36798b9d969b,
278 0x4921d03d8c0c04af,
279];
280
281pub(crate) static EXP_M2: [u64; 128] = [
286 0x3ff0000000000000,
287 0x3ff0202015600446,
288 0x3ff04080ab55de39,
289 0x3ff06122436410dd,
290 0x3ff08205601127ed,
291 0x3ff0a32a84e9c1f6,
292 0x3ff0c49236829e8c,
293 0x3ff0e63cfa7ab09d,
294 0x3ff1082b577d34ed,
295 0x3ff12a5dd543ccc5,
296 0x3ff14cd4fc989cd6,
297 0x3ff16f9157587069,
298 0x3ff192937074e0cd,
299 0x3ff1b5dbd3f68122,
300 0x3ff1d96b0eff0e79,
301 0x3ff1fd41afcba45e,
302 0x3ff2216045b6f5cd,
303 0x3ff245c7613b8a9b,
304 0x3ff26a7793f60164,
305 0x3ff28f7170a755fd,
306 0x3ff2b4b58b372c79,
307 0x3ff2da4478b620c7,
308 0x3ff3001ecf601af7,
309 0x3ff32645269ea829,
310 0x3ff34cb8170b5835,
311 0x3ff373783a722012,
312 0x3ff39a862bd3c106,
313 0x3ff3c1e2876834aa,
314 0x3ff3e98deaa11dcc,
315 0x3ff41188f42c3e32,
316 0x3ff439d443f5f159,
317 0x3ff462707b2bac21,
318 0x3ff48b5e3c3e8186,
319 0x3ff4b49e2ae5ac67,
320 0x3ff4de30ec211e60,
321 0x3ff50817263c13cd,
322 0x3ff5325180cfacf7,
323 0x3ff55ce0a4c58c7c,
324 0x3ff587c53c5a7af0,
325 0x3ff5b2fff3210fd9,
326 0x3ff5de9176045ff5,
327 0x3ff60a7a734ab0e8,
328 0x3ff636bb9a983258,
329 0x3ff663559cf1bc7c,
330 0x3ff690492cbf9433,
331 0x3ff6bd96fdd034a2,
332 0x3ff6eb3fc55b1e76,
333 0x3ff719443a03acb9,
334 0x3ff747a513dbef6a,
335 0x3ff776630c678bc1,
336 0x3ff7a57ede9ea23e,
337 0x3ff7d4f946f0ba8d,
338 0x3ff804d30347b546,
339 0x3ff8350cd30ac390,
340 0x3ff865a7772164c5,
341 0x3ff896a3b1f66a0e,
342 0x3ff8c802477b0010,
343 0x3ff8f9c3fd29beaf,
344 0x3ff92be99a09bf00,
345 0x3ff95e73e6b1b75e,
346 0x3ff99163ad4b1dcc,
347 0x3ff9c4b9b995509b,
348 0x3ff9f876d8e8c566,
349 0x3ffa2c9bda3a3e78,
350 0x3ffa61298e1e069c,
351 0x3ffa9620c6cb3374,
352 0x3ffacb82581eee54,
353 0x3ffb014f179fc3b8,
354 0x3ffb3787dc80f95f,
355 0x3ffb6e2d7fa5eb18,
356 0x3ffba540dba56e56,
357 0x3ffbdcc2cccd3c85,
358 0x3ffc14b431256446,
359 0x3ffc4d15e873c193,
360 0x3ffc85e8d43f7cd0,
361 0x3ffcbf2dd7d490f2,
362 0x3ffcf8e5d84758a9,
363 0x3ffd3311bc7822b4,
364 0x3ffd6db26d16cd67,
365 0x3ffda8c8d4a66969,
366 0x3ffde455df80e3c0,
367 0x3ffe205a7bdab73e,
368 0x3ffe5cd799c6a54e,
369 0x3ffe99ce2b397649,
370 0x3ffed73f240dc142,
371 0x3fff152b7a07bb76,
372 0x3fff539424d90f5e,
373 0x3fff927a1e24bb76,
374 0x3fffd1de6182f8c9,
375 0x400008e0f64294ab,
376 0x40002912df5ce72a,
377 0x400049856cd84339,
378 0x40006a39207f0a09,
379 0x40008b2e7d2035cf,
380 0x4000ac6606916501,
381 0x4000cde041b0e9ae,
382 0x4000ef9db467dcf8,
383 0x4001119ee5ac36b6,
384 0x400133e45d82e952,
385 0x4001566ea50201d7,
386 0x4001793e4652cc50,
387 0x40019c53ccb3fc6b,
388 0x4001bfafc47bda73,
389 0x4001e352bb1a74ad,
390 0x4002073d3f1bd518,
391 0x40022b6fe02a3b9c,
392 0x40024feb2f105cb8,
393 0x400274afbdbba4a6,
394 0x400299be1f3e7f1c,
395 0x4002bf16e7d2a38c,
396 0x4002e4baacdb6614,
397 0x40030aaa04e80d05,
398 0x400330e587b62b28,
399 0x4003576dce33fead,
400 0x40037e437282d4ee,
401 0x4003a5670ff972ed,
402 0x4003ccd9432682b4,
403 0x4003f49aa9d30590,
404 0x40041cabe304cb34,
405 0x4004450d8f00edd4,
406 0x40046dc04f4e5338,
407 0x400496c4c6b832da,
408 0x4004c01b9950a111,
409 0x4004e9c56c731f5d,
410 0x400513c2e6c731d7,
411 0x40053e14b042f9ca,
412 0x400568bb722dd593,
413 0x400593b7d72305bb,
414];
415
416#[inline]
420pub fn f_expf(x: f32) -> f32 {
421 let x_u = x.to_bits();
422 let x_abs = x_u & 0x7fff_ffffu32;
423 if x_abs >= 0x42b2_0000u32 || x_abs <= 0x3280_0000u32 {
424 let exp = ((x_u >> 23) & 0xFF) as i32;
425 if exp <= 101i32 {
427 return 1.0 + x;
428 }
429
430 if x_u >= 0xc2cf_f1b5u32 {
432 if x.is_infinite() {
434 return 0.0;
435 }
436 if x.is_nan() {
438 return x;
439 }
440 return 0.0;
441 }
442 if x.is_sign_positive() && (x_u >= 0x42b2_0000) {
444 return x + f32::INFINITY;
446 }
447 }
448
449 let kf = (x * 128.).round_finite();
465 let xd = f_fmlaf(kf, -0.0078125 , x) as f64;
467 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
469 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
471 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
473 let p = f_polyeval5(
479 xd,
480 1.,
481 f64::from_bits(0x3feffffffffff777),
482 f64::from_bits(0x3fe000000000071c),
483 f64::from_bits(0x3fc555566668e5e7),
484 f64::from_bits(0x3fa55555555ef243),
485 );
486 (p * exp_hi * exp_mid) as f32
487}
488
489#[inline]
490pub(crate) fn core_expf(x: f32) -> f64 {
491 let kf = (x * 128.).round_finite();
493 let xd = f_fmlaf(kf, -0.0078125 , x) as f64;
495 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
497 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
499 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
501 let p = f_polyeval5(
507 xd,
508 1.,
509 f64::from_bits(0x3feffffffffff777),
510 f64::from_bits(0x3fe000000000071c),
511 f64::from_bits(0x3fc555566668e5e7),
512 f64::from_bits(0x3fa55555555ef243),
513 );
514 p * exp_hi * exp_mid
515}
516
517#[inline]
518pub(crate) fn core_expdf(x: f64) -> f64 {
519 let kf = (x * 128.).round_finite();
521 let xd = f_fmla(kf, -0.0078125 , x);
523 let mut x_hi = unsafe { kf.to_int_unchecked::<i32>() }; x_hi += 104 << 7;
525 let exp_hi = f64::from_bits(EXP_M1[(x_hi >> 7) as usize]);
527 let exp_mid = f64::from_bits(EXP_M2[(x_hi & 0x7f) as usize]);
529 let p = f_polyeval5(
535 xd,
536 1.,
537 f64::from_bits(0x3feffffffffff777),
538 f64::from_bits(0x3fe000000000071c),
539 f64::from_bits(0x3fc555566668e5e7),
540 f64::from_bits(0x3fa55555555ef243),
541 );
542 p * exp_hi * exp_mid
543}
544
545#[cfg(test)]
546mod tests {
547 use super::*;
548
549 #[test]
550 fn expf_test() {
551 assert!(
552 (expf(0f32) - 1f32).abs() < 1e-6,
553 "Invalid result {}",
554 expf(0f32)
555 );
556 assert!(
557 (expf(5f32) - 148.4131591025766f32).abs() < 1e-6,
558 "Invalid result {}",
559 expf(5f32)
560 );
561 }
562
563 #[test]
564 fn f_expf_test() {
565 assert_eq!(f_expf(-103.971596), 1e-45);
566 assert!(
567 (f_expf(0f32) - 1f32).abs() < 1e-6,
568 "Invalid result {}",
569 f_expf(0f32)
570 );
571 assert!(
572 (f_expf(5f32) - 148.4131591025766f32).abs() < 1e-6,
573 "Invalid result {}",
574 f_expf(5f32)
575 );
576 assert_eq!(f_expf(f32::INFINITY), f32::INFINITY);
577 assert_eq!(f_expf(f32::NEG_INFINITY), 0.);
578 assert!(f_expf(f32::NAN).is_nan());
579 }
580}