1use crate::common::{f_fmla, f_fmlaf, pow2if};
30use crate::polyeval::f_polyeval6;
31use crate::round::RoundFinite;
32use std::hint::black_box;
33
34const TBLSIZE: usize = 64;
35
36#[repr(align(64))]
37struct Exp2Table([(u32, u32); TBLSIZE]);
38
39#[rustfmt::skip]
40static EXP2FT: Exp2Table = Exp2Table([(0x3F3504F3, 0xB2D4175E),(0x3F36FD92, 0x3268D5EF),(0x3F38FBAF, 0xB30E8719),(0x3F3AFF5B, 0x3319E7DA),(0x3F3D08A4, 0x333CD82F),(0x3F3F179A, 0x330E1902),(0x3F412C4D, 0x32CCF4D7),(0x3F4346CD, 0x328F330E),(0x3F45672A, 0xB201B5B7),(0x3F478D75, 0x32CCCE34),(0x3F49B9BE, 0x335E937C),(0x3F4BEC15, 0x2FF41909),(0x3F4E248C, 0xB21760EA),(0x3F506334, 0x3283628B),(0x3F52A81E, 0x3340F500),(0x3F54F35B, 0x331202BD),(0x3F5744FD, 0x32B66A3E),(0x3F599D16, 0x32D0D9B1),(0x3F5BFBB8, 0x332ED93F),(0x3F5E60F5, 0x3350A709),(0x3F60CCDF, 0x32025744),(0x3F633F89, 0xB33A7C4D),(0x3F65B907, 0x321DA4E9),(0x3F68396A, 0xB2FF36A7),(0x3F6AC0C7, 0x3217E40E),(0x3F6D4F30, 0xB2400CBB),(0x3F6FE4BA, 0x331A2ACC),(0x3F728177, 0xB2B7D3E5),(0x3F75257D, 0xB1FED2BE),(0x3F77D0DF, 0xB32B73BA),(0x3F7A83B3, 0x32579081),(0x3F7D3E0C, 0xB19726B5),(0x3F800000, 0x00000000),(0x3F8164D2, 0x320C09FB),(0x3F82CD87, 0x3391E031),(0x3F843A29, 0x33287EEF),(0x3F85AAC3, 0xB38F6665),(0x3F871F62, 0x339004AB),(0x3F88980F, 0x33AC4561),(0x3F8A14D5, 0xB39CDAEA),(0x3F8B95C2, 0x32949D5C),(0x3F8D1ADF, 0xB36F79FA),(0x3F8EA43A, 0x33971DC2),(0x3F9031DC, 0xB32BD022),(0x3F91C3D3, 0xB3928952),(0x3F935A2B, 0xB2EBFECF),(0x3F94F4F0, 0x3357B8BB),(0x3F96942D, 0xB307353B),(0x3F9837F0, 0xB345DFE9),(0x3F99E046, 0x3382A804),(0x3F9B8D3A, 0x3326993E),(0x3F9D3EDA, 0x3350A029),(0x3F9EF532, 0xB3605F62),(0x3FA0B051, 0xB210909B),(0x3FA27043, 0xB0DDC369),(0x3FA43516, 0x33385844),(0x3FA5FED7, 0x33400757),(0x3FA7CD94, 0x3325446E),(0x3FA9A15B, 0x33237A50),(0x3FAB7A3A, 0x33201CA4),(0x3FAD583F, 0x32394687),(0x3FAF3B79, 0x332E1225),(0x3FB123F6, 0x33838969),(0x3FB311C4, 0xB219F2BA)]);
41
42pub(crate) static EXP2F_TABLE: [u64; 64] = [
53 0x3ff0000000000000,
54 0x3ff02c9a3e778061,
55 0x3ff059b0d3158574,
56 0x3ff0874518759bc8,
57 0x3ff0b5586cf9890f,
58 0x3ff0e3ec32d3d1a2,
59 0x3ff11301d0125b51,
60 0x3ff1429aaea92de0,
61 0x3ff172b83c7d517b,
62 0x3ff1a35beb6fcb75,
63 0x3ff1d4873168b9aa,
64 0x3ff2063b88628cd6,
65 0x3ff2387a6e756238,
66 0x3ff26b4565e27cdd,
67 0x3ff29e9df51fdee1,
68 0x3ff2d285a6e4030b,
69 0x3ff306fe0a31b715,
70 0x3ff33c08b26416ff,
71 0x3ff371a7373aa9cb,
72 0x3ff3a7db34e59ff7,
73 0x3ff3dea64c123422,
74 0x3ff4160a21f72e2a,
75 0x3ff44e086061892d,
76 0x3ff486a2b5c13cd0,
77 0x3ff4bfdad5362a27,
78 0x3ff4f9b2769d2ca7,
79 0x3ff5342b569d4f82,
80 0x3ff56f4736b527da,
81 0x3ff5ab07dd485429,
82 0x3ff5e76f15ad2148,
83 0x3ff6247eb03a5585,
84 0x3ff6623882552225,
85 0x3ff6a09e667f3bcd,
86 0x3ff6dfb23c651a2f,
87 0x3ff71f75e8ec5f74,
88 0x3ff75feb564267c9,
89 0x3ff7a11473eb0187,
90 0x3ff7e2f336cf4e62,
91 0x3ff82589994cce13,
92 0x3ff868d99b4492ed,
93 0x3ff8ace5422aa0db,
94 0x3ff8f1ae99157736,
95 0x3ff93737b0cdc5e5,
96 0x3ff97d829fde4e50,
97 0x3ff9c49182a3f090,
98 0x3ffa0c667b5de565,
99 0x3ffa5503b23e255d,
100 0x3ffa9e6b5579fdbf,
101 0x3ffae89f995ad3ad,
102 0x3ffb33a2b84f15fb,
103 0x3ffb7f76f2fb5e47,
104 0x3ffbcc1e904bc1d2,
105 0x3ffc199bdd85529c,
106 0x3ffc67f12e57d14b,
107 0x3ffcb720dcef9069,
108 0x3ffd072d4a07897c,
109 0x3ffd5818dcfba487,
110 0x3ffda9e603db3285,
111 0x3ffdfc97337b9b5f,
112 0x3ffe502ee78b3ff6,
113 0x3ffea4afa2a490da,
114 0x3ffefa1bee615a27,
115 0x3fff50765b6e4540,
116 0x3fffa7c1819e90d8,
117];
118
119#[inline]
149pub fn f_exp2f(x: f32) -> f32 {
150 let mut t = x.to_bits();
151
152 if (t & 0xffff) == 0 {
153 let k: i32 = (((t >> 23) & 0xff) as i32).wrapping_sub(127); if k >= 0 && k < 9 && (t << (9i32.wrapping_add(k))) == 0 {
156 let msk = (t as i32) >> 31;
158 let mut m: i32 = (((t & 0x7fffff) | (1 << 23)) >> (23 - k)) as i32;
159 m = (m ^ msk).wrapping_sub(msk).wrapping_add(127);
160 if m > 0 && m < 255 {
161 t = (m as u32).wrapping_shl(23);
162 return f32::from_bits(t);
163 } else if m <= 0 && m > -23 {
164 t = 1i32.wrapping_shl(22i32.wrapping_add(m) as u32) as u32;
165 return f32::from_bits(t);
166 }
167 }
168 }
169 let ux = t.wrapping_shl(1);
170
171 if ux >= 0x86000000u32 || ux < 0x65000000u32 {
172 if ux < 0x65000000u32 {
174 return 1.0 + x;
175 } if !(t >= 0xc3000000u32 && t < 0xc3150000u32) {
178 if ux >= 0xffu32 << 24 {
179 if ux > (0xffu32 << 24) {
181 return x + x;
182 } static IR: [f32; 2] = [f32::INFINITY, 0.];
184 return IR[(t >> 31) as usize]; }
186 if t >= 0xc3150000u32 {
187 let z = x;
189 let mut y = f_fmla(
190 z as f64 + 149.,
191 f64::from_bits(0x3690000000000000),
192 f64::from_bits(0x36a0000000000000),
193 );
194 y = y.max(f64::from_bits(0x3680000000000000));
195 return y as f32;
196 }
197 let r = black_box(f64::from_bits(0x47e0000000000000))
199 * black_box(f64::from_bits(0x47e0000000000000));
200 return r as f32;
201 }
202 }
203
204 if ux <= 0x7a000000u32 {
205 const C: [u64; 6] = [
214 0x3fe62e42fefa39f3,
215 0x3fcebfbdff82c57b,
216 0x3fac6b08d6f2d7aa,
217 0x3f83b2ab6fc92f5d,
218 0x3f55d897cfe27125,
219 0x3f243090e61e6af1,
220 ];
221 let xd = x as f64;
222 let p = f_polyeval6(
223 xd,
224 f64::from_bits(C[0]),
225 f64::from_bits(C[1]),
226 f64::from_bits(C[2]),
227 f64::from_bits(C[3]),
228 f64::from_bits(C[4]),
229 f64::from_bits(C[5]),
230 );
231 return f_fmla(p, xd, 1.) as f32;
232 }
233
234 let x_d = x as f64;
235 let kf = (x_d * 64.).round_finite();
236 let k = unsafe { kf.to_int_unchecked::<i32>() }; let dx = f_fmla(f64::from_bits(0xbf90000000000000), kf, x_d);
239
240 const TABLE_BITS: u32 = 6;
241 const TABLE_MASK: u64 = (1u64 << TABLE_BITS) - 1;
242
243 let exp_hi: i64 = ((k >> TABLE_BITS) as i64).wrapping_shl(52);
246
247 let mh_bits = (EXP2F_TABLE[((k as u64) & TABLE_MASK) as usize] as i64).wrapping_add(exp_hi);
250 let mh = f64::from_bits(mh_bits as u64);
251
252 const C: [u64; 5] = [
256 0x3fe62e42fefa39ef,
257 0x3fcebfbdff8131c4,
258 0x3fac6b08d7061695,
259 0x3f83b2b1bee74b2a,
260 0x3f55d88091198529,
261 ];
262 let dx_sq = dx * dx;
263 let c1 = f_fmla(dx, f64::from_bits(C[0]), 1.0);
264 let c2 = f_fmla(dx, f64::from_bits(C[2]), f64::from_bits(C[1]));
265 let c3 = f_fmla(dx, f64::from_bits(C[4]), f64::from_bits(C[3]));
266 let p = f_fmla(dx_sq, c3, c2);
267 f_fmla(p, dx_sq * mh, c1 * mh) as f32
272}
273
274#[inline]
275pub(crate) fn dirty_exp2f(d: f32) -> f32 {
276 let redux = f32::from_bits(0x4b400000) / TBLSIZE as f32;
277
278 let ui = f32::to_bits(d + redux);
279 let mut i0 = ui;
280 i0 = i0.wrapping_add(TBLSIZE as u32 / 2);
281 let k = i0 / TBLSIZE as u32;
282 i0 &= TBLSIZE as u32 - 1;
283 let mut uf = f32::from_bits(ui);
284 uf -= redux;
285
286 let item = EXP2FT.0[i0 as usize];
287 let z0: f32 = f32::from_bits(item.0);
288
289 let f: f32 = d - uf;
290
291 let mut u = 0.24022650695908768;
292 u = f_fmlaf(u, f, 0.69314718055994973);
293 u *= f;
294
295 let i2 = pow2if(k as i32);
296 f_fmlaf(u, z0, z0) * i2
297}
298
299#[cfg(test)]
300mod tests {
301 use super::*;
302
303 #[test]
304 fn test_exp2f() {
305 assert_eq!(f_exp2f(1. / 64.), 1.0108893);
306 assert_eq!(f_exp2f(2.0), 4.0);
307 assert_eq!(f_exp2f(3.0), 8.0);
308 assert_eq!(f_exp2f(4.0), 16.0);
309 assert_eq!(f_exp2f(10.0), 1024.0);
310 assert_eq!(f_exp2f(-10.0), 0.0009765625);
311 assert!(f_exp2f(f32::NAN).is_nan());
312 assert_eq!(f_exp2f(-0.35), 0.7845841);
313 assert_eq!(f_exp2f(0.35), 1.2745606);
314 assert!(f_exp2f(f32::INFINITY).is_infinite());
315 assert_eq!(f_exp2f(f32::NEG_INFINITY), 0.0);
316 }
317
318 #[test]
319 fn test_dirty_exp2f() {
320 assert!((dirty_exp2f(0.35f32) - 0.35f32.exp2()).abs() < 1e-5);
321 assert!((dirty_exp2f(-0.6f32) - (-0.6f32).exp2()).abs() < 1e-5);
322 }
323}