1use crate::bessel::i0::bessel_rsqrt_hard;
30use crate::bessel::i0_exp;
31use crate::common::f_fmla;
32use crate::double_double::DoubleDouble;
33use crate::dyadic_float::{DyadicFloat128, DyadicSign};
34use crate::exponents::rational128_exp;
35
36pub fn f_i2(x: f64) -> f64 {
38 let ux = x.to_bits().wrapping_shl(1);
39
40 if ux >= 0x7ffu64 << 53 || ux == 0 {
41 if ux == 0 {
43 return 0.;
45 }
46 if x.is_infinite() {
47 return f64::INFINITY;
48 }
49 return x + f64::NAN; }
51
52 let xb = x.to_bits() & 0x7fff_ffff_ffff_ffffu64;
53
54 if xb < 0x401f000000000000u64 {
55 if xb <= 0x3cb0000000000000u64 {
57 const R: f64 = 1. / 8.;
60 let x2 = x * x * R;
61 return x2;
62 }
63 return i2_small(f64::from_bits(xb));
64 }
65
66 if xb >= 0x40864feaeefb23b8 {
67 return f64::INFINITY;
69 }
70
71 i2_asympt(f64::from_bits(xb))
72}
73
74#[inline]
95fn i2_small(x: f64) -> f64 {
96 const P: [(u64, u64); 12] = [
97 (0x0000000000000000, 0x3fc0000000000000),
98 (0x3c247833fda9de9a, 0x3f8387c6e72a1b5f),
99 (0xbbccaf0be91261a6, 0x3f30ba88efff56fa),
100 (0x3b57c911bfebe1d7, 0x3ecc62e53d061300),
101 (0x3af3b963f26a3d05, 0x3e5bb090327a14e1),
102 (0x3a898bff9d40e030, 0x3de0d29c3d37e5b5),
103 (0xb9f2f63c80d377db, 0x3d5a9e365f1bf6e0),
104 (0xb965e6d78e1c2b65, 0x3ccbf7ef0929b813),
105 (0xb8da83d7d40e7310, 0x3c33737520046f4d),
106 (0xb83f811d5aa3f36e, 0x3b91506558dab318),
107 (0xb78e601bf5c998c3, 0x3ae2013b3e858bd1),
108 (0xb6c8185c51734ed8, 0x3a20cc277a5051ba),
109 ];
110 let x_sqr = DoubleDouble::from_exact_mult(x, x);
111 let x2 = x_sqr * x_sqr;
112 let x4 = x2 * x2;
113 let x8 = x4 * x4;
114
115 let e0 = DoubleDouble::mul_add_f64(
116 x_sqr,
117 DoubleDouble::from_bit_pair(P[1]),
118 f64::from_bits(0x3fc0000000000000),
119 );
120 let e1 = DoubleDouble::mul_add(
121 x_sqr,
122 DoubleDouble::from_bit_pair(P[3]),
123 DoubleDouble::from_bit_pair(P[2]),
124 );
125 let e2 = DoubleDouble::mul_add(
126 x_sqr,
127 DoubleDouble::from_bit_pair(P[5]),
128 DoubleDouble::from_bit_pair(P[4]),
129 );
130 let e3 = DoubleDouble::mul_add(
131 x_sqr,
132 DoubleDouble::from_bit_pair(P[7]),
133 DoubleDouble::from_bit_pair(P[6]),
134 );
135 let e4 = DoubleDouble::mul_add(
136 x_sqr,
137 DoubleDouble::from_bit_pair(P[9]),
138 DoubleDouble::from_bit_pair(P[8]),
139 );
140 let e5 = DoubleDouble::mul_add(
141 x_sqr,
142 DoubleDouble::from_bit_pair(P[11]),
143 DoubleDouble::from_bit_pair(P[10]),
144 );
145
146 let f0 = DoubleDouble::mul_add(x2, e1, e0);
147 let f1 = DoubleDouble::mul_add(x2, e3, e2);
148 let f2 = DoubleDouble::mul_add(x2, e5, e4);
149
150 let g0 = DoubleDouble::mul_add(x4, f1, f0);
151
152 let p_num = DoubleDouble::mul_add(x8, f2, g0);
153
154 const Q: [(u64, u64); 12] = [
155 (0x0000000000000000, 0x3ff0000000000000),
156 (0xbc0ba42af56ed76b, 0xbf7cd8e6e2b39f60),
157 (0x3b90697aa005e598, 0x3efa0260394e1a3d),
158 (0xbb0c7ccde1f63c82, 0xbe6f1766ec64e492),
159 (0x3a63f18409bc336f, 0x3ddb80b6b5abad98),
160 (0xb9e0cd49f22132fe, 0xbd42ff9b55d553da),
161 (0xb934bfe64905d309, 0x3ca50814fa258ebc),
162 (0x38a1e35c2d6860f4, 0xbc02c4f2faca2195),
163 (0xb7ff39e268277e4e, 0x3b5aa545a2c1f16d),
164 (0xb71053f58545760c, 0xbaacde4c133d42d1),
165 (0xb68d0c2ccab0ae5b, 0x39f5a965b92b06bc),
166 (0xb5dc35bda16bee7b, 0xb931375b1c9cfbc7),
167 ];
168
169 let e0 = DoubleDouble::mul_add_f64(
170 x_sqr,
171 DoubleDouble::from_bit_pair(Q[1]),
172 f64::from_bits(0x3ff0000000000000),
173 );
174 let e1 = DoubleDouble::mul_add(
175 x_sqr,
176 DoubleDouble::from_bit_pair(Q[3]),
177 DoubleDouble::from_bit_pair(Q[2]),
178 );
179 let e2 = DoubleDouble::mul_add(
180 x_sqr,
181 DoubleDouble::from_bit_pair(Q[5]),
182 DoubleDouble::from_bit_pair(Q[4]),
183 );
184 let e3 = DoubleDouble::mul_add(
185 x_sqr,
186 DoubleDouble::from_bit_pair(Q[7]),
187 DoubleDouble::from_bit_pair(Q[6]),
188 );
189 let e4 = DoubleDouble::mul_add(
190 x_sqr,
191 DoubleDouble::from_bit_pair(Q[9]),
192 DoubleDouble::from_bit_pair(Q[8]),
193 );
194 let e5 = DoubleDouble::mul_add(
195 x_sqr,
196 DoubleDouble::from_bit_pair(Q[11]),
197 DoubleDouble::from_bit_pair(Q[10]),
198 );
199
200 let f0 = DoubleDouble::mul_add(x2, e1, e0);
201 let f1 = DoubleDouble::mul_add(x2, e3, e2);
202 let f2 = DoubleDouble::mul_add(x2, e5, e4);
203
204 let g0 = DoubleDouble::mul_add(x4, f1, f0);
205
206 let p_den = DoubleDouble::mul_add(x8, f2, g0);
207
208 let p = DoubleDouble::div(p_num, p_den);
209 DoubleDouble::quick_mult(p, x_sqr).to_f64()
210}
211
212#[inline]
232fn i2_asympt(x: f64) -> f64 {
233 let dx = x;
234 let recip = DoubleDouble::from_quick_recip(x);
235 const P: [(u64, u64); 12] = [
236 (0x3c718bb28ebc5f4e, 0x3fd9884533d43650),
237 (0x3c96e15a87b6e1d1, 0xc0350acc9e5cb0f9),
238 (0xbd20b212a79e08f5, 0x40809251af67598a),
239 (0xbd563b7397df3a54, 0xc0bfc09ede682c8b),
240 (0xbd5eb872cb057d91, 0x40f44253a9e1e1ab),
241 (0x3d7614735e566fc5, 0xc121cbcd96dc8765),
242 (0xbddc4f8df2010026, 0x4145a592e8ec74ad),
243 (0x3dea227617b678a7, 0xc161df96fb6a9df9),
244 (0x3e17c9690d906194, 0x41732c71397757f8),
245 (0x3e0638226ce0b938, 0xc178893fde0e6ed7),
246 (0xbe09d8dc4e7930ce, 0x417066abe24b31df),
247 (0xbde152007ee29e54, 0xc150531da3f31b16),
248 ];
249
250 let x2 = DoubleDouble::quick_mult(recip, recip);
251 let x4 = DoubleDouble::quick_mult(x2, x2);
252 let x8 = DoubleDouble::quick_mult(x4, x4);
253
254 let e0 = DoubleDouble::mul_add(
255 recip,
256 DoubleDouble::from_bit_pair(P[1]),
257 DoubleDouble::from_bit_pair(P[0]),
258 );
259 let e1 = DoubleDouble::mul_add(
260 recip,
261 DoubleDouble::from_bit_pair(P[3]),
262 DoubleDouble::from_bit_pair(P[2]),
263 );
264 let e2 = DoubleDouble::mul_add(
265 recip,
266 DoubleDouble::from_bit_pair(P[5]),
267 DoubleDouble::from_bit_pair(P[4]),
268 );
269 let e3 = DoubleDouble::mul_add(
270 recip,
271 DoubleDouble::from_bit_pair(P[7]),
272 DoubleDouble::from_bit_pair(P[6]),
273 );
274 let e4 = DoubleDouble::mul_add(
275 recip,
276 DoubleDouble::from_bit_pair(P[9]),
277 DoubleDouble::from_bit_pair(P[8]),
278 );
279 let e5 = DoubleDouble::mul_add(
280 recip,
281 DoubleDouble::from_bit_pair(P[11]),
282 DoubleDouble::from_bit_pair(P[10]),
283 );
284
285 let f0 = DoubleDouble::mul_add(x2, e1, e0);
286 let f1 = DoubleDouble::mul_add(x2, e3, e2);
287 let f2 = DoubleDouble::mul_add(x2, e5, e4);
288
289 let g0 = DoubleDouble::mul_add(x4, f1, f0);
290
291 let p_num = DoubleDouble::mul_add(x8, f2, g0);
292
293 const Q: [(u64, u64); 12] = [
294 (0x0000000000000000, 0x3ff0000000000000),
295 (0xbcd0d33e9e73b503, 0xc0496f5a09751d50),
296 (0x3d2f9c44a069dc4b, 0x40934427187ac370),
297 (0xbd69e2e5a3618381, 0xc0d19983f74fdf52),
298 (0x3d88c69a62ae8b44, 0x410524fcaa71e85a),
299 (0xbdc0345b806dd0bf, 0xc13120daf531b66b),
300 (0xbdd35875712fff6f, 0x4152943a4f9f1c7f),
301 (0xbdf8dd50e92553fd, 0xc169b83aeede08ea),
302 (0x3e0800ecaa77f79e, 0x41746c61554a08ce),
303 (0x3dd74fbc32c5f696, 0xc16ba2febd1932a3),
304 (0x3dc23eb2c943b539, 0x413574ae68b6b378),
305 (0xbd95d86c5c94cd65, 0xc104adac99eaa90c),
306 ];
307
308 let e0 = DoubleDouble::mul_add_f64(
309 recip,
310 DoubleDouble::from_bit_pair(Q[1]),
311 f64::from_bits(0x3ff0000000000000),
312 );
313 let e1 = DoubleDouble::mul_add(
314 recip,
315 DoubleDouble::from_bit_pair(Q[3]),
316 DoubleDouble::from_bit_pair(Q[2]),
317 );
318 let e2 = DoubleDouble::mul_add(
319 recip,
320 DoubleDouble::from_bit_pair(Q[5]),
321 DoubleDouble::from_bit_pair(Q[4]),
322 );
323 let e3 = DoubleDouble::mul_add(
324 recip,
325 DoubleDouble::from_bit_pair(Q[7]),
326 DoubleDouble::from_bit_pair(Q[6]),
327 );
328 let e4 = DoubleDouble::mul_add(
329 recip,
330 DoubleDouble::from_bit_pair(Q[9]),
331 DoubleDouble::from_bit_pair(Q[8]),
332 );
333 let e5 = DoubleDouble::mul_add(
334 recip,
335 DoubleDouble::from_bit_pair(Q[11]),
336 DoubleDouble::from_bit_pair(Q[10]),
337 );
338
339 let f0 = DoubleDouble::mul_add(x2, e1, e0);
340 let f1 = DoubleDouble::mul_add(x2, e3, e2);
341 let f2 = DoubleDouble::mul_add(x2, e5, e4);
342
343 let g0 = DoubleDouble::mul_add(x4, f1, f0);
344
345 let p_den = DoubleDouble::mul_add(x8, f2, g0);
346
347 let z = DoubleDouble::div(p_num, p_den);
348
349 let mut e = i0_exp(dx * 0.5);
350 e = DoubleDouble::from_exact_add(e.hi, e.lo);
351 let r_sqrt = DoubleDouble::from_rsqrt_fast(dx);
352 let r = DoubleDouble::quick_mult(z * r_sqrt * e, e);
353 let err = f_fmla(
354 r.hi,
355 f64::from_bits(0x3c40000000000000), f64::from_bits(0x3ba0000000000000), );
358 let up = r.hi + (r.lo + err);
359 let lb = r.hi + (r.lo - err);
360 if up == lb {
361 return r.to_f64();
362 }
363 i2_asympt_hard(x)
364}
365
366#[cold]
386#[inline(never)]
387fn i2_asympt_hard(x: f64) -> f64 {
388 static P: [DyadicFloat128; 16] = [
389 DyadicFloat128 {
390 sign: DyadicSign::Pos,
391 exponent: -129,
392 mantissa: 0xcc42299e_a1b28468_3bb16645_ba1dc793_u128,
393 },
394 DyadicFloat128 {
395 sign: DyadicSign::Neg,
396 exponent: -123,
397 mantissa: 0xe202abf7_de10e93f_2a2e6a0f_af69c788_u128,
398 },
399 DyadicFloat128 {
400 sign: DyadicSign::Pos,
401 exponent: -118,
402 mantissa: 0xf70296c3_ad33bde6_866cfd01_0e846cfc_u128,
403 },
404 DyadicFloat128 {
405 sign: DyadicSign::Neg,
406 exponent: -113,
407 mantissa: 0xa83df971_736c4e6c_1a35479b_ad6d9172_u128,
408 },
409 DyadicFloat128 {
410 sign: DyadicSign::Pos,
411 exponent: -109,
412 mantissa: 0x9baa2015_9c5ca461_0aff0b62_54a70fdb_u128,
413 },
414 DyadicFloat128 {
415 sign: DyadicSign::Neg,
416 exponent: -106,
417 mantissa: 0xc70af95d_f95d14ad_1094ea1b_e46b2d2f_u128,
418 },
419 DyadicFloat128 {
420 sign: DyadicSign::Pos,
421 exponent: -103,
422 mantissa: 0xa838fb48_e79fb706_642da604_6a73b4f8_u128,
423 },
424 DyadicFloat128 {
425 sign: DyadicSign::Neg,
426 exponent: -101,
427 mantissa: 0x8fe29f37_02b1e876_39e88664_1c8b3b5d_u128,
428 },
429 DyadicFloat128 {
430 sign: DyadicSign::Neg,
431 exponent: -100,
432 mantissa: 0xc8e9a474_0a03f93a_16d2e7a9_627eba4e_u128,
433 },
434 DyadicFloat128 {
435 sign: DyadicSign::Pos,
436 exponent: -95,
437 mantissa: 0x8807d1f6_6d646a08_8c7e8900_12d6a5ed_u128,
438 },
439 DyadicFloat128 {
440 sign: DyadicSign::Neg,
441 exponent: -93,
442 mantissa: 0xe5c25026_97518024_36878256_fd81c08f_u128,
443 },
444 DyadicFloat128 {
445 sign: DyadicSign::Pos,
446 exponent: -91,
447 mantissa: 0xeaa075f0_f5151bed_95ec612f_ab9834a7_u128,
448 },
449 DyadicFloat128 {
450 sign: DyadicSign::Neg,
451 exponent: -89,
452 mantissa: 0x9b267222_82d5c666_348d7d1d_0fedfba4_u128,
453 },
454 DyadicFloat128 {
455 sign: DyadicSign::Pos,
456 exponent: -88,
457 mantissa: 0x81b45c4c_3e828396_1d5bdede_869c3b84_u128,
458 },
459 DyadicFloat128 {
460 sign: DyadicSign::Neg,
461 exponent: -89,
462 mantissa: 0xf4495d43_4bc8dba6_42bdb5d6_c8ba2c9c_u128,
463 },
464 DyadicFloat128 {
465 sign: DyadicSign::Pos,
466 exponent: -90,
467 mantissa: 0xc9b29546_0c226270_bb428035_587b6d6a_u128,
468 },
469 ];
470 static Q: [DyadicFloat128; 16] = [
471 DyadicFloat128 {
472 sign: DyadicSign::Pos,
473 exponent: -127,
474 mantissa: 0x80000000_00000000_00000000_00000000_u128,
475 },
476 DyadicFloat128 {
477 sign: DyadicSign::Neg,
478 exponent: -121,
479 mantissa: 0x89e18bae_ca9629a1_26927ba2_fbdd66ab_u128,
480 },
481 DyadicFloat128 {
482 sign: DyadicSign::Pos,
483 exponent: -116,
484 mantissa: 0x92a90fc2_e905f634_4946e8a0_dd8e3874_u128,
485 },
486 DyadicFloat128 {
487 sign: DyadicSign::Neg,
488 exponent: -112,
489 mantissa: 0xc1742696_d29e3846_3e183737_29db8b68_u128,
490 },
491 DyadicFloat128 {
492 sign: DyadicSign::Pos,
493 exponent: -108,
494 mantissa: 0xabf61cc0_236a0e90_2572113d_fa339591_u128,
495 },
496 DyadicFloat128 {
497 sign: DyadicSign::Neg,
498 exponent: -105,
499 mantissa: 0xcff0fe90_dac1b08e_9a5740ae_b2984fc1_u128,
500 },
501 DyadicFloat128 {
502 sign: DyadicSign::Pos,
503 exponent: -102,
504 mantissa: 0x9ff36729_e407c538_cfcea3a7_63f39043_u128,
505 },
506 DyadicFloat128 {
507 sign: DyadicSign::Neg,
508 exponent: -101,
509 mantissa: 0xc86ff6a3_9b803a31_d385e9ea_83f9d751_u128,
510 },
511 DyadicFloat128 {
512 sign: DyadicSign::Neg,
513 exponent: -98,
514 mantissa: 0xb4a125b1_6cab70f3_0f314558_708843df_u128,
515 },
516 DyadicFloat128 {
517 sign: DyadicSign::Pos,
518 exponent: -94,
519 mantissa: 0x9670fd33_f83bcaa7_85cf2d82_c0bf8cd5_u128,
520 },
521 DyadicFloat128 {
522 sign: DyadicSign::Neg,
523 exponent: -92,
524 mantissa: 0xd70b4ea5_32fedb9d_78a3c047_05e650f4_u128,
525 },
526 DyadicFloat128 {
527 sign: DyadicSign::Pos,
528 exponent: -90,
529 mantissa: 0xb9c7904c_3f97b633_c2c0ad9b_ad573ede_u128,
530 },
531 DyadicFloat128 {
532 sign: DyadicSign::Neg,
533 exponent: -89,
534 mantissa: 0xc2023c21_5155e9fe_6fb17bb2_c865becd_u128,
535 },
536 DyadicFloat128 {
537 sign: DyadicSign::Pos,
538 exponent: -89,
539 mantissa: 0xd9400a5e_27c58803_22948cf3_6154ac49_u128,
540 },
541 DyadicFloat128 {
542 sign: DyadicSign::Neg,
543 exponent: -90,
544 mantissa: 0x87aa272d_6a9700b4_449a9db8_1a93b0ee_u128,
545 },
546 DyadicFloat128 {
547 sign: DyadicSign::Neg,
548 exponent: -93,
549 mantissa: 0xd1a86655_5b259611_dfc7affc_6ffb0e20_u128,
550 },
551 ];
552
553 let recip = DyadicFloat128::accurate_reciprocal(x);
554
555 let mut p_num = P[15];
556 for i in (0..15).rev() {
557 p_num = recip * p_num + P[i];
558 }
559 let mut p_den = Q[15];
560 for i in (0..15).rev() {
561 p_den = recip * p_den + Q[i];
562 }
563 let z = p_num * p_den.reciprocal();
564 let r_sqrt = bessel_rsqrt_hard(x, recip);
565 let f_exp = rational128_exp(x);
566 (z * r_sqrt * f_exp).fast_as_f64()
567}
568
569#[cfg(test)]
570mod tests {
571 use super::*;
572
573 #[test]
574 fn test_i2() {
575 assert_eq!(f_i2(7.750000000757874), 257.0034362785801);
576 assert_eq!(f_i2(7.482812501363189), 198.26765887136534);
577 assert_eq!(f_i2(-7.750000000757874), 257.0034362785801);
578 assert_eq!(f_i2(-7.482812501363189), 198.26765887136534);
579 assert!(f_i2(f64::NAN).is_nan());
580 assert_eq!(f_i2(f64::INFINITY), f64::INFINITY);
581 assert_eq!(f_i2(f64::NEG_INFINITY), f64::INFINITY);
582 assert_eq!(f_i2(0.01), 1.2500104166992188e-5);
583 assert_eq!(f_i2(-0.01), 1.2500104166992188e-5);
584 assert_eq!(f_i2(-9.01), 872.9250699638584);
585 assert_eq!(f_i2(9.01), 872.9250699638584);
586 }
587}