1use crate::common::{dd_fmla, is_integerf};
30use crate::double_double::DoubleDouble;
31use crate::round_ties_even::RoundTiesEven;
32use std::hint::black_box;
33
34#[cold]
35#[inline(never)]
36fn as_compoundf_special(x: f32, y: f32) -> f32 {
37 let nx = x.to_bits();
38 let ny = y.to_bits();
39 let ax: u32 = nx.wrapping_shl(1);
40 let ay: u32 = ny.wrapping_shl(1);
41
42 if ax == 0 || ay == 0 {
43 if ax == 0 {
45 return if y.is_nan() { x + y } else { 1.0 };
47 }
48
49 if ay == 0 {
50 if x.is_nan() {
52 return x + y;
53 } return if x < -1.0 {
55 f32::NAN } else {
57 1.0
58 }; }
60 }
61
62 let mone = (-1.0f32).to_bits();
63 if ay >= 0xffu32 << 24 {
64 if ax > 0xffu32 << 24 {
67 return x + y;
68 } if ay == 0xffu32 << 24 {
70 if nx > mone {
72 return f32::NAN;
73 } let sy = ny >> 31; if nx == mone {
76 return if sy == 0 {
77 0.0 } else {
79 f32::INFINITY };
81 }
82 if x < 0.0 {
83 return if sy == 0 { 0.0 } else { f32::INFINITY };
84 }
85 if x > 0.0 {
86 return if sy != 0 { 0.0 } else { f32::INFINITY };
87 }
88 return 1.0;
89 }
90 return x + y; }
92
93 if nx >= mone || nx >= 0xffu32 << 23 {
94 if ax == 0xffu32 << 24 {
96 if (nx >> 31) != 0 {
98 return f32::NAN;
99 } return if (ny >> 31) != 0 { 1.0 / x } else { x };
102 }
103 if ax > 0xffu32 << 24 {
104 return x + y;
105 } if nx > mone {
107 return f32::NAN; }
109 return if (ny >> 31) != 0 {
111 f32::INFINITY
113 } else {
114 0.0
116 };
117 }
118 0.0
119}
120
121#[inline]
122pub(crate) fn log2p1_polyeval_1(z: f64) -> f64 {
123 const P: [u64; 8] = [
127 0x0000000000000000,
128 0x3ff71547652b82fe,
129 0xbfe71547652b8d11,
130 0x3fdec709dc3a5014,
131 0xbfd715475b144983,
132 0x3fd2776c3fda300e,
133 0xbfcec990162358ce,
134 0x3fca645337c29e27,
135 ];
136
137 let z2 = z * z;
138 let mut c5 = dd_fmla(f64::from_bits(P[6]), z, f64::from_bits(P[5]));
139 let c3 = dd_fmla(f64::from_bits(P[4]), z, f64::from_bits(P[3]));
140 let mut c1 = dd_fmla(f64::from_bits(P[2]), z, f64::from_bits(P[1]));
141 let z4 = z2 * z2;
142 c5 = dd_fmla(f64::from_bits(P[7]), z2, c5);
143 c1 = dd_fmla(c3, z2, c1);
144 c1 = dd_fmla(c5, z4, c1);
145 z * c1
146}
147
148pub(crate) static LOG2P1_COMPOUNDF_INV: [u64; 46] = [
150 0x3ff6800000000000,
151 0x3ff6000000000000,
152 0x3ff5800000000000,
153 0x3ff5000000000000,
154 0x3ff4c00000000000,
155 0x3ff4400000000000,
156 0x3ff4000000000000,
157 0x3ff3800000000000,
158 0x3ff3400000000000,
159 0x3ff2c00000000000,
160 0x3ff2800000000000,
161 0x3ff2000000000000,
162 0x3ff1c00000000000,
163 0x3ff1800000000000,
164 0x3ff1400000000000,
165 0x3ff1000000000000,
166 0x3ff0c00000000000,
167 0x3ff0800000000000,
168 0x3ff0000000000000,
169 0x3ff0000000000000,
170 0x3fef400000000000,
171 0x3feec00000000000,
172 0x3fee400000000000,
173 0x3fee000000000000,
174 0x3fed800000000000,
175 0x3fed000000000000,
176 0x3feca00000000000,
177 0x3fec400000000000,
178 0x3febe00000000000,
179 0x3feb800000000000,
180 0x3feb200000000000,
181 0x3feac00000000000,
182 0x3fea800000000000,
183 0x3fea200000000000,
184 0x3fe9c00000000000,
185 0x3fe9800000000000,
186 0x3fe9200000000000,
187 0x3fe8c00000000000,
188 0x3fe8800000000000,
189 0x3fe8400000000000,
190 0x3fe8000000000000,
191 0x3fe7c00000000000,
192 0x3fe7600000000000,
193 0x3fe7200000000000,
194 0x3fe6e00000000000,
195 0x3fe6a00000000000,
196];
197
198pub(crate) static LOG2P1_COMPOUNDF_LOG2_INV: [(u64, u64); 46] = [
202 (0x3c68f3673ffdd785, 0xbfdf7a8568cb06cf),
203 (0x3c1c141e66faaaad, 0xbfdd6753e032ea0f),
204 (0x3c76fae441c09d76, 0xbfdb47ebf73882a1),
205 (0x3c72d352bea51e59, 0xbfd91bba891f1709),
206 (0xbc69575b04fa6fbd, 0xbfd800a563161c54),
207 (0x3c7817fd3b7d7e5d, 0xbfd5c01a39fbd688),
208 (0x3c1b6d40900b2502, 0xbfd49a784bcd1b8b),
209 (0x3c7f6e91ad16ecff, 0xbfd24407ab0e073a),
210 (0x3c6a7b47d2c352d9, 0xbfd11307dad30b76),
211 (0x3c5b85a54d7ee2fd, 0xbfcd49ee4c325970),
212 (0x3c401ee1343fe7ca, 0xbfcacf5e2db4ec94),
213 (0x3c6817fd3b7d7e5d, 0xbfc5c01a39fbd688),
214 (0xbc4f51f2c075a74c, 0xbfc32ae9e278ae1a),
215 (0x3c6a7610e40bd6ab, 0xbfc08c588cda79e4),
216 (0xbc58ecb169b9465f, 0xbfbbc84240adabba),
217 (0xbc5f3314e0985116, 0xbfb663f6fac91316),
218 (0x3c530c22d15199b8, 0xbfb0eb389fa29f9b),
219 (0xbc389b03784b5be1, 0xbfa6bad3758efd87),
220 (0x0000000000000000, 0x0000000000000000),
221 (0x0000000000000000, 0x0000000000000000),
222 (0x3c3491f06c085bc2, 0x3fa184b8e4c56af8),
223 (0x3c0155660710eb2a, 0x3fad6ebd1f1febfe),
224 (0x3c2c141e66faaaad, 0x3fb4c560fe68af88),
225 (0x3c59ced1447e30ad, 0x3fb7d60496cfbb4c),
226 (0x3c592ce9636c90a0, 0x3fbe0b1ae8f2fd56),
227 (0xbc5696e2866c718e, 0x3fc22dadc2ab3497),
228 (0xbc61562d61af73f8, 0x3fc494f863b8df35),
229 (0xbc60798d1aa21694, 0x3fc7046031c79f85),
230 (0xbc6e95734abd2fcc, 0x3fc97c1cb13c7ec1),
231 (0x3c2bc0af7b82e7d7, 0x3fcbfc67a7fff4cc),
232 (0xbc6086fce864a1f6, 0x3fce857d3d361368),
233 (0xbc53d56efe4338fe, 0x3fd08bce0d95fa38),
234 (0x3c7c8d43e017579b, 0x3fd169c05363f158),
235 (0xbc50132ae5e417cd, 0x3fd2baa0c34be1ec),
236 (0xbc7c658d602e66b0, 0x3fd4106017c3eca3),
237 (0x3c7e393a16b94b52, 0x3fd4f6fbb2cec598),
238 (0x3c7ac9080333c605, 0x3fd6552b49986277),
239 (0x3c68f89e2eb553b2, 0x3fd7b89f02cf2aad),
240 (0x3c799aa6df8b7d83, 0x3fd8a8980abfbd32),
241 (0x3c7bca36fd02def0, 0x3fd99b072a96c6b2),
242 (0x3c5817fd3b7d7e5d, 0x3fda8ff971810a5e),
243 (0xbc501d98c3531027, 0x3fdb877c57b1b070),
244 (0x3c78a38b4175d665, 0x3fdcffae611ad12b),
245 (0x3c438c8946414c6a, 0x3fddfdd89d586e2b),
246 (0x3c76d261f1753e0b, 0x3fdefec61b011f85),
247 (0xbc87398fe685f171, 0x3fe0014332be0033),
248];
249
250#[inline]
253fn compoundf_expf_poly(z: f64) -> f64 {
254 const Q: [u64; 5] = [
257 0x3ff0000000000000,
258 0x3fe62e42fef6d01a,
259 0x3fcebfbdff7feeba,
260 0x3fac6b167e579bee,
261 0x3f83b2b3428d06de,
262 ];
263 let z2 = z * z;
264 let c3 = dd_fmla(f64::from_bits(Q[4]), z, f64::from_bits(Q[3]));
265 let c0 = dd_fmla(f64::from_bits(Q[1]), z, f64::from_bits(Q[0]));
266 let c2 = dd_fmla(c3, z, f64::from_bits(Q[2]));
267 dd_fmla(c2, z2, c0)
268}
269
270pub(crate) fn compoundf_log2p1_fast(x: f64) -> f64 {
271 let u = 1.0 + x;
276 let mut v = u.to_bits();
283 let m: u64 = v & 0xfffffffffffffu64;
284 let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
285 v = v.wrapping_sub((e << 52) as u64);
287 let t = f64::from_bits(v);
288 v = (f64::from_bits(v) + 2.0).to_bits(); let i = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
293 let z = dd_fmla(r, t, -1.0); let p = log2p1_polyeval_1(z);
296 e as f64 + (f64::from_bits(LOG2P1_COMPOUNDF_LOG2_INV[i as usize].1) + p)
298}
299
300pub(crate) static COMPOUNDF_EXP2_T: [u64; 33] = [
301 0xbfe0000000000000,
302 0xbfde000000000000,
303 0xbfdc000000000000,
304 0xbfda000000000000,
305 0xbfd8000000000000,
306 0xbfd6000000000000,
307 0xbfd4000000000000,
308 0xbfd2000000000000,
309 0xbfd0000000000000,
310 0xbfcc000000000000,
311 0xbfc8000000000000,
312 0xbfc4000000000000,
313 0xbfc0000000000000,
314 0xbfb8000000000000,
315 0xbfb0000000000000,
316 0xbfa0000000000000,
317 0x0000000000000000,
318 0x3fa0000000000000,
319 0x3fb0000000000000,
320 0x3fb8000000000000,
321 0x3fc0000000000000,
322 0x3fc4000000000000,
323 0x3fc8000000000000,
324 0x3fcc000000000000,
325 0x3fd0000000000000,
326 0x3fd2000000000000,
327 0x3fd4000000000000,
328 0x3fd6000000000000,
329 0x3fd8000000000000,
330 0x3fda000000000000,
331 0x3fdc000000000000,
332 0x3fde000000000000,
333 0x3fe0000000000000,
334];
335
336pub(crate) static COMPOUNDF_EXP2_U: [(u64, u64); 33] = [
340 (0xbc8bdd3413b26456, 0x3fe6a09e667f3bcd),
341 (0xbc716e4786887a99, 0x3fe71f75e8ec5f74),
342 (0xbc741577ee04992f, 0x3fe7a11473eb0187),
343 (0xbc8d4c1dd41532d8, 0x3fe82589994cce13),
344 (0x3c86e9f156864b27, 0x3fe8ace5422aa0db),
345 (0xbc575fc781b57ebc, 0x3fe93737b0cdc5e5),
346 (0x3c6c7c46b071f2be, 0x3fe9c49182a3f090),
347 (0xbc8d2f6edb8d41e1, 0x3fea5503b23e255d),
348 (0x3c87a1cd345dcc81, 0x3feae89f995ad3ad),
349 (0xbc65584f7e54ac3b, 0x3feb7f76f2fb5e47),
350 (0x3c711065895048dd, 0x3fec199bdd85529c),
351 (0x3c6503cbd1e949db, 0x3fecb720dcef9069),
352 (0x3c72ed02d75b3707, 0x3fed5818dcfba487),
353 (0xbc81a5cd4f184b5c, 0x3fedfc97337b9b5f),
354 (0xbc8e9c23179c2893, 0x3feea4afa2a490da),
355 (0x3c89d3e12dd8a18b, 0x3fef50765b6e4540),
356 (0x0000000000000000, 0x3ff0000000000000),
357 (0x3c8d73e2a475b465, 0x3ff059b0d3158574),
358 (0x3c98a62e4adc610b, 0x3ff0b5586cf9890f),
359 (0xbc96c51039449b3a, 0x3ff11301d0125b51),
360 (0xbc819041b9d78a76, 0x3ff172b83c7d517b),
361 (0x3c9e016e00a2643c, 0x3ff1d4873168b9aa),
362 (0x3c99b07eb6c70573, 0x3ff2387a6e756238),
363 (0x3c8612e8afad1255, 0x3ff29e9df51fdee1),
364 (0x3c86f46ad23182e4, 0x3ff306fe0a31b715),
365 (0xbc963aeabf42eae2, 0x3ff371a7373aa9cb),
366 (0x3c8ada0911f09ebc, 0x3ff3dea64c123422),
367 (0x3c489b7a04ef80d0, 0x3ff44e086061892d),
368 (0x3c7d4397afec42e2, 0x3ff4bfdad5362a27),
369 (0xbc807abe1db13cad, 0x3ff5342b569d4f82),
370 (0x3c96324c054647ad, 0x3ff5ab07dd485429),
371 (0xbc9383c17e40b497, 0x3ff6247eb03a5585),
372 (0xbc9bdd3413b26456, 0x3ff6a09e667f3bcd),
373];
374
375fn exp2_fast(t: f64) -> f64 {
380 let k = t.round_ties_even_finite(); let mut r = t - k; let mut v: u64 = (3.015625 + r).to_bits(); let i: i32 = (v >> 46) as i32 - 0x10010; r -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); v = (f64::from_bits(COMPOUNDF_EXP2_U[i as usize].1) * compoundf_expf_poly(r)).to_bits();
389 let mut err: u64 = 0x3d61d00000000000; v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) }; if f64::from_bits(v) < f64::from_bits(0x38100000000008e2) {
402 return -1.0;
403 }
404 err = unsafe { err.wrapping_add((k.to_int_unchecked::<i64>() << 52) as u64) }; let lb = (f64::from_bits(v) - f64::from_bits(err)) as f32;
406 let rb = (f64::from_bits(v) + f64::from_bits(err)) as f32;
407 if lb != rb {
408 return -1.0;
409 } f64::from_bits(v)
412}
413
414pub(crate) static LOG2P1_SCALE: [u64; 158] = [
417 0x41c0000000000000,
418 0x41b0000000000000,
419 0x41a0000000000000,
420 0x4190000000000000,
421 0x4180000000000000,
422 0x4170000000000000,
423 0x4160000000000000,
424 0x4150000000000000,
425 0x4140000000000000,
426 0x4130000000000000,
427 0x4120000000000000,
428 0x4110000000000000,
429 0x4100000000000000,
430 0x40f0000000000000,
431 0x40e0000000000000,
432 0x40d0000000000000,
433 0x40c0000000000000,
434 0x40b0000000000000,
435 0x40a0000000000000,
436 0x4090000000000000,
437 0x4080000000000000,
438 0x4070000000000000,
439 0x4060000000000000,
440 0x4050000000000000,
441 0x4040000000000000,
442 0x4030000000000000,
443 0x4020000000000000,
444 0x4010000000000000,
445 0x4000000000000000,
446 0x3ff0000000000000,
447 0x3fe0000000000000,
448 0x3fd0000000000000,
449 0x3fc0000000000000,
450 0x3fb0000000000000,
451 0x3fa0000000000000,
452 0x3f90000000000000,
453 0x3f80000000000000,
454 0x3f70000000000000,
455 0x3f60000000000000,
456 0x3f50000000000000,
457 0x3f40000000000000,
458 0x3f30000000000000,
459 0x3f20000000000000,
460 0x3f10000000000000,
461 0x3f00000000000000,
462 0x3ef0000000000000,
463 0x3ee0000000000000,
464 0x3ed0000000000000,
465 0x3ec0000000000000,
466 0x3eb0000000000000,
467 0x3ea0000000000000,
468 0x3e90000000000000,
469 0x3e80000000000000,
470 0x3e70000000000000,
471 0x3e60000000000000,
472 0x3e50000000000000,
473 0x3e40000000000000,
474 0x3e30000000000000,
475 0x3e20000000000000,
476 0x3e10000000000000,
477 0x3e00000000000000,
478 0x3df0000000000000,
479 0x3de0000000000000,
480 0x3dd0000000000000,
481 0x3dc0000000000000,
482 0x3db0000000000000,
483 0x3da0000000000000,
484 0x3d90000000000000,
485 0x3d80000000000000,
486 0x3d70000000000000,
487 0x3d60000000000000,
488 0x3d50000000000000,
489 0x3d40000000000000,
490 0x3d30000000000000,
491 0x3d20000000000000,
492 0x3d10000000000000,
493 0x3d00000000000000,
494 0x3cf0000000000000,
495 0x3ce0000000000000,
496 0x3cd0000000000000,
497 0x3cc0000000000000,
498 0x3cb0000000000000,
499 0x3ca0000000000000,
500 0x3c90000000000000,
501 0x3c80000000000000,
502 0x3c70000000000000,
503 0x3c60000000000000,
504 0x3c50000000000000,
505 0x3c40000000000000,
506 0x3c30000000000000,
507 0x3c20000000000000,
508 0x3c10000000000000,
509 0x3c00000000000000,
510 0x3bf0000000000000,
511 0x3be0000000000000,
512 0x3bd0000000000000,
513 0x3bc0000000000000,
514 0x3bb0000000000000,
515 0x3ba0000000000000,
516 0x3b90000000000000,
517 0x3b80000000000000,
518 0x3b70000000000000,
519 0x3b60000000000000,
520 0x3b50000000000000,
521 0x3b40000000000000,
522 0x3b30000000000000,
523 0x3b20000000000000,
524 0x3b10000000000000,
525 0x3b00000000000000,
526 0x3af0000000000000,
527 0x3ae0000000000000,
528 0x3ad0000000000000,
529 0x3ac0000000000000,
530 0x3ab0000000000000,
531 0x3aa0000000000000,
532 0x3a90000000000000,
533 0x3a80000000000000,
534 0x3a70000000000000,
535 0x3a60000000000000,
536 0x3a50000000000000,
537 0x3a40000000000000,
538 0x3a30000000000000,
539 0x3a20000000000000,
540 0x3a10000000000000,
541 0x3a00000000000000,
542 0x39f0000000000000,
543 0x39e0000000000000,
544 0x39d0000000000000,
545 0x39c0000000000000,
546 0x39b0000000000000,
547 0x39a0000000000000,
548 0x3990000000000000,
549 0x3980000000000000,
550 0x3970000000000000,
551 0x3960000000000000,
552 0x3950000000000000,
553 0x3940000000000000,
554 0x3930000000000000,
555 0x3920000000000000,
556 0x3910000000000000,
557 0x3900000000000000,
558 0x38f0000000000000,
559 0x38e0000000000000,
560 0x38d0000000000000,
561 0x38c0000000000000,
562 0x38b0000000000000,
563 0x38a0000000000000,
564 0x3890000000000000,
565 0x3880000000000000,
566 0x3870000000000000,
567 0x3860000000000000,
568 0x3850000000000000,
569 0x3840000000000000,
570 0x3830000000000000,
571 0x3820000000000000,
572 0x3810000000000000,
573 0x3800000000000000,
574 0x37f0000000000000,
575];
576
577static LOG2P1_LOG2_POLY: [u64; 18] = [
589 0x3ff71547652b82fe,
590 0x3c7777d0ffda0d80,
591 0xbfe71547652b82fe,
592 0xbc6777d0fd20b49c,
593 0x3fdec709dc3a03fd,
594 0x3c7d27f05171b74a,
595 0xbfd71547652b82fe,
596 0xbc57814e70b828b0,
597 0x3fd2776c50ef9bfe,
598 0x3c7e4f63e12bff83,
599 0xbfcec709dc3a03f4,
600 0x3fca61762a7adecc,
601 0xbfc71547652d8849,
602 0x3fc484b13d7e7029,
603 0xbfc2776c1b2a40fd,
604 0x3fc0c9a80f9b7c1c,
605 0xbfbecc6801121200,
606 0x3fbc6e4b91fd10e5,
607];
608
609fn log2_poly2(z: DoubleDouble) -> DoubleDouble {
610 let mut h = f64::from_bits(LOG2P1_LOG2_POLY[4 + 13]); for i in 7..=12 {
617 h = dd_fmla(z.hi, z.hi, f64::from_bits(LOG2P1_LOG2_POLY[4 + i]));
618 }
619
620 let mut v = DoubleDouble::quick_mult_f64(z, h);
621 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[10]));
622 v.hi = t.hi;
623 v.lo += t.lo;
624
625 v = DoubleDouble::quick_mult(v, z);
626
627 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[8]));
628 v.hi = t.hi;
629 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[9]);
630
631 v = DoubleDouble::quick_mult(v, z);
632
633 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[6]));
634 v.hi = t.hi;
635 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[7]);
636
637 v = DoubleDouble::quick_mult(v, z);
638
639 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[4]));
640 v.hi = t.hi;
641 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[5]);
642
643 v = DoubleDouble::quick_mult(v, z);
644
645 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[2]));
646 v.hi = t.hi;
647 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[3]);
648
649 v = DoubleDouble::quick_mult(v, z);
650
651 let t = DoubleDouble::from_exact_add(v.hi, f64::from_bits(LOG2P1_LOG2_POLY[0]));
652 v.hi = t.hi;
653 v.lo += t.lo + f64::from_bits(LOG2P1_LOG2_POLY[1]);
654
655 v = DoubleDouble::quick_mult(v, z);
656
657 v
658}
659
660pub(crate) fn compoundf_log2p1_accurate(x: f64) -> DoubleDouble {
665 let mut v_dd = if 1.0 >= x {
666 if (x as f32).abs() >= f32::from_bits(0x25000000) {
668 DoubleDouble::from_exact_add(1.0, x)
669 } else {
670 DoubleDouble::new(x, 1.0)
671 }
672 } else {
673 DoubleDouble::from_exact_add(x, 1.0)
675 };
676
677 let mut v = v_dd.hi.to_bits();
679 let m = v & 0xfffffffffffffu64;
680 let e: i64 = (v >> 52) as i64 - 0x3ff + (m >= 0x6a09e667f3bcdu64) as i64;
681
682 let scale = f64::from_bits(LOG2P1_SCALE[e.wrapping_add(29) as usize]);
683 v_dd.hi *= scale;
684 v_dd.lo *= scale;
685
686 v = (2.0 + v_dd.hi).to_bits(); let i: i32 = (v >> 45) as i32 - 0x2002d; let r = f64::from_bits(LOG2P1_COMPOUNDF_INV[i as usize]);
693 let mut z_dd = DoubleDouble::new(r * v_dd.lo, dd_fmla(r, v_dd.hi, -1.0)); z_dd = DoubleDouble::from_exact_add(z_dd.hi, z_dd.lo);
698 let log_p = log2_poly2(z_dd);
708 let log2_inv = LOG2P1_COMPOUNDF_LOG2_INV[i as usize];
714 v_dd = DoubleDouble::from_exact_add(e as f64, f64::from_bits(log2_inv.1));
715 v_dd.lo += f64::from_bits(log2_inv.0);
716 let mut p = DoubleDouble::from_exact_add(v_dd.hi, log_p.hi);
717 p.lo += v_dd.lo + log_p.lo;
718 p
719}
720
721pub(crate) fn compoundf_exp2_poly2(z: DoubleDouble) -> DoubleDouble {
722 static Q2: [u64; 12] = [
726 0x3ff0000000000000,
727 0x3fe62e42fefa39ef,
728 0x3c7abc9d45534d06,
729 0x3fcebfbdff82c58f,
730 0xbc65e4383cf9ddf7,
731 0x3fac6b08d704a0c0,
732 0xbc46cbc55586c8f1,
733 0x3f83b2ab6fba4e77,
734 0x3f55d87fe789aec5,
735 0x3f2430912f879daa,
736 0x3eeffcc774b2367a,
737 0x3eb62c017b9bdfe6,
738 ];
739 let h2 = z.hi * z.hi;
740 let c7 = dd_fmla(f64::from_bits(Q2[11]), z.hi, f64::from_bits(Q2[10]));
741 let mut c5 = dd_fmla(f64::from_bits(Q2[9]), z.hi, f64::from_bits(Q2[8]));
742 c5 = dd_fmla(c7, h2, c5);
743 let z_vqh = c5 * z.hi;
745 let mut q = DoubleDouble::from_exact_add(f64::from_bits(Q2[7]), z_vqh);
746 q = DoubleDouble::quick_mult(q, z);
748 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[5]), q.hi);
750 q.hi = t.hi;
751 q.lo += t.lo + f64::from_bits(Q2[6]);
752 q = DoubleDouble::quick_mult(q, z);
754 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[3]), q.hi);
755 q.hi = t.hi;
756 q.lo += t.lo + f64::from_bits(Q2[4]);
757 q = DoubleDouble::quick_mult(q, z);
759 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[1]), q.hi);
760 q.hi = t.hi;
761 q.lo += t.lo + f64::from_bits(Q2[2]);
762 q = DoubleDouble::quick_mult(q, z);
764 let t = DoubleDouble::from_exact_add(f64::from_bits(Q2[0]), q.hi);
765 q.hi = t.hi;
766 q.lo += t.lo;
767 q
768}
769
770fn compoundf_exp2_accurate(x_dd: DoubleDouble, x: f32, y: f32) -> f32 {
776 if y == 1.0 {
777 let res = 1.0 + x;
778 return res;
779 }
780 let k = x_dd.hi.round_ties_even_finite(); if k == 0. && x_dd.hi.abs() <= f64::from_bits(0x3e6715476af0d4c8) {
784 return (1.0 + x_dd.hi * 0.5) as f32;
789 }
790
791 let r = x_dd.hi - k; let mut v_dd = DoubleDouble::from_exact_add(r, x_dd.lo);
794 let mut v = (3.015625 + v_dd.hi).to_bits(); let i: i32 = ((v >> 46) as i32).wrapping_sub(0x10010); v_dd.hi -= f64::from_bits(COMPOUNDF_EXP2_T[i as usize]); v_dd = DoubleDouble::from_exact_add(v_dd.hi, v_dd.lo);
803 let q = compoundf_exp2_poly2(v_dd);
804
805 let exp2u = DoubleDouble::from_bit_pair(COMPOUNDF_EXP2_U[i as usize]);
808 let mut q = DoubleDouble::quick_mult(exp2u, q);
809 q = DoubleDouble::from_exact_add(q.hi, q.lo);
810 let mut w = q.hi.to_bits();
834 if ((w.wrapping_add(1)) & 0xfffffffu64) <= 2 {
835 static ERR: [u64; 2] = [0x3abb100000000000, 0x3a2d800000000000];
836 let small: bool = k == 0. && i == 16 && x_dd.hi <= f64::from_bits(0x3f60000000000000);
837 let err = f64::from_bits(ERR[small as usize]);
838
839 w = (q.hi + (q.lo + err)).to_bits();
840 w = unsafe { w.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
841 }
842
843 v = (q.hi + q.lo).to_bits();
846 if (w.wrapping_shl(36)) == 0 && f64::from_bits(v) == q.hi && q.lo != 0. {
849 v = v.wrapping_add((if q.lo > 0. { 1i64 } else { -1i64 }) as u64); }
851 v = unsafe { v.wrapping_add(k.to_int_unchecked::<i64>().wrapping_shl(52) as u64) };
852 f64::from_bits(v) as f32
854}
855
856#[cold]
861#[inline(never)]
862fn compoundf_accurate(x: f32, y: f32) -> f32 {
863 let mut v = compoundf_log2p1_accurate(x as f64);
864 v = DoubleDouble::quick_mult_f64(v, y as f64);
868 compoundf_exp2_accurate(v, x, y)
878}
879
880#[inline]
884pub fn f_compoundf(x: f32, y: f32) -> f32 {
885 let mone = (-1.0f32).to_bits();
896 let nx = x.to_bits();
897 let ny = y.to_bits();
898 if nx >= mone {
899 return as_compoundf_special(x, y);
900 } let ax: u32 = nx.wrapping_shl(1);
904 let ay: u32 = ny.wrapping_shl(1);
905 if ax == 0 || ax >= 0xffu32 << 24 || ay == 0 || ay >= 0xffu32 << 24 {
906 return as_compoundf_special(x, y);
907 } if is_integerf(y) && ay <= 0x83000000u32 && ax <= 0xbefffffeu32 {
911 if ax <= 0x62000000u32 {
912 return 1.0 + y * x;
913 } let mut s = x as f64 + 1.;
915 let mut iter_count = unsafe { y.abs().to_int_unchecked::<usize>() };
916
917 let mut acc = if iter_count % 2 != 0 { s } else { 1. };
919
920 while {
921 iter_count >>= 1;
922 iter_count
923 } != 0
924 {
925 s = s * s;
926 if iter_count % 2 != 0 {
927 acc *= s;
928 }
929 }
930
931 let dz = if y.is_sign_negative() { 1. / acc } else { acc };
932 return dz as f32;
933 }
934
935 let xd = x as f64;
936 let yd = y as f64;
937 let tx = xd.to_bits();
938 let ty = yd.to_bits();
939
940 let l: f64 = compoundf_log2p1_fast(f64::from_bits(tx));
941
942 let t: u64 = (l * f64::from_bits(ty)).to_bits();
946 if (t.wrapping_shl(1)) >= (0x406u64 << 53) {
952 if t >= 0x3018bu64 << 46 {
954 return black_box(f32::from_bits(0x00800000)) * black_box(f32::from_bits(0x00800000));
956 } else if (t >> 63) == 0 {
957 return black_box(f32::from_bits(0x7e800000)) * black_box(f32::from_bits(0x7e800000));
959 }
960 }
961
962 if (t.wrapping_shl(1)) <= 0x3e6715476ba97f14u64 {
967 return if (t >> 63) != 0 {
968 black_box(1.0) - black_box(f32::from_bits(0x33000000))
969 } else {
970 black_box(1.0) + black_box(f32::from_bits(0x33000000))
971 };
972 }
973
974 let res = exp2_fast(f64::from_bits(t));
975 if res != -1.0 {
976 return res as f32;
977 }
978 compoundf_accurate(x, y)
979}
980
981#[cfg(test)]
982mod tests {
983 use super::*;
984
985 #[test]
986 fn test_compoundf() {
987 assert_eq!(
988 f_compoundf(
989 0.000000000000000000000000000000000000011754944,
990 -170502050000000000000000000000000000000.
991 ),
992 1.
993 );
994 assert_eq!(f_compoundf(1.235, 1.432), 3.1634824);
995 assert_eq!(f_compoundf(2., 3.0), 27.);
996 assert!(f_compoundf(-2., 5.0).is_nan());
997 assert_eq!(f_compoundf(1., f32::INFINITY), f32::INFINITY);
998 assert_eq!(f_compoundf(1., f32::NEG_INFINITY), 0.0);
999 }
1000}