pxfm/bessel/
j1.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#![allow(clippy::excessive_precision)]
31
32use crate::bessel::alpha1::{
33    bessel_1_asympt_alpha, bessel_1_asympt_alpha_fast, bessel_1_asympt_alpha_hard,
34};
35use crate::bessel::beta1::{
36    bessel_1_asympt_beta, bessel_1_asympt_beta_fast, bessel_1_asympt_beta_hard,
37};
38use crate::bessel::i0::bessel_rsqrt_hard;
39use crate::bessel::j1_coeffs::{J1_COEFFS, J1_ZEROS, J1_ZEROS_VALUE};
40use crate::bessel::j1_coeffs_taylor::J1_COEFFS_TAYLOR;
41use crate::common::f_fmla;
42use crate::double_double::DoubleDouble;
43use crate::dyadic_float::{DyadicFloat128, DyadicSign};
44use crate::polyeval::{f_polyeval8, f_polyeval9, f_polyeval12, f_polyeval19};
45use crate::sin_helper::{sin_dd_small, sin_dd_small_fast, sin_f128_small};
46use crate::sincos_reduce::{AngleReduced, rem2pi_any, rem2pi_f128};
47
48/// Bessel of the first kind of order 1
49///
50/// Note about accuracy:
51/// - Close to zero Bessel have tiny values such that testing against MPFR must be done exactly
52///   in the same precision, since any nearest representable number have ULP > 0.5,
53///   for example `J1(0.000000000000000000000000000000000000023509886)` in single precision
54///   have 0.7 ULP for any number with extended precision that would be represented in f32
55///   Same applies to J1(4.4501477170144018E-309) in double precision and some others subnormal numbers
56pub fn f_j1(x: f64) -> f64 {
57    let ux = x.to_bits().wrapping_shl(1);
58
59    if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
60        // |x| <= f64::EPSILON, |x| == inf, x == NaN
61        if ux <= 0x72338c9356bb0314u64 {
62            // |x| <= 0.000000000000000000000000000000001241
63            // J1(x) ~ x/2+O[x]^3
64            return x * 0.5;
65        }
66        if ux <= 0x7960000000000000u64 {
67            // |x| <= f64::EPSILON
68            // J1(x) ~ x/2-x^3/16+O[x]^5
69            let quad_part_x = x * 0.125; // exact. x / 8
70            return f_fmla(quad_part_x, -quad_part_x, 0.5) * x;
71        }
72        if x.is_infinite() {
73            return 0.;
74        }
75        return x + f64::NAN; // x == NaN
76    }
77
78    let ax: u64 = x.to_bits() & 0x7fff_ffff_ffff_ffff;
79
80    if ax < 0x4052a6784230fcf8u64 {
81        // |x| < 74.60109
82        if ax < 0x3feccccccccccccd {
83            // |x| < 0.9
84            return j1_maclaurin_series_fast(x);
85        }
86        return j1_small_argument_fast(x);
87    }
88
89    j1_asympt_fast(x)
90}
91
92/*
93   Evaluates:
94   J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
95   discarding 1*PI/2 using identities gives:
96   J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
97
98   to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
99
100   J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
101*/
102#[inline]
103fn j1_asympt_fast(x: f64) -> f64 {
104    let origin_x = x;
105    static SGN: [f64; 2] = [1., -1.];
106    let sign_scale = SGN[x.is_sign_negative() as usize];
107    let x = x.abs();
108
109    const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
110        f64::from_bits(0xbc8cbc0d30ebfd15),
111        f64::from_bits(0x3fe9884533d43651),
112    );
113    const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
114        f64::from_bits(0xbc81a62633145c07),
115        f64::from_bits(0xbfe921fb54442d18),
116    );
117
118    let recip = if x.to_bits() > 0x7fd000000000000u64 {
119        DoubleDouble::quick_mult_f64(DoubleDouble::from_exact_safe_div(4.0, x), 0.25)
120    } else {
121        DoubleDouble::from_recip(x)
122    };
123
124    let alpha = bessel_1_asympt_alpha_fast(recip);
125    let beta = bessel_1_asympt_beta_fast(recip);
126
127    let AngleReduced { angle } = rem2pi_any(x);
128
129    // Without full subtraction cancellation happens sometimes
130    let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
131    let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
132
133    let m_sin = sin_dd_small_fast(r0);
134    let z0 = DoubleDouble::quick_mult(beta, m_sin);
135    let r_sqrt = DoubleDouble::from_rsqrt_fast(x);
136    let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
137    let p = DoubleDouble::quick_mult(scale, z0);
138
139    let err = f_fmla(
140        p.hi,
141        f64::from_bits(0x3be0000000000000), // 2^-65
142        f64::from_bits(0x3a60000000000000), // 2^-89
143    );
144    let ub = p.hi + (p.lo + err);
145    let lb = p.hi + (p.lo - err);
146
147    if ub == lb {
148        return p.to_f64() * sign_scale;
149    }
150
151    j1_asympt(origin_x, recip, r_sqrt, angle)
152}
153
154/*
155   Evaluates:
156   J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
157   discarding 1*PI/2 using identities gives:
158   J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
159
160   to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
161
162   J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
163*/
164fn j1_asympt(x: f64, recip: DoubleDouble, r_sqrt: DoubleDouble, angle: DoubleDouble) -> f64 {
165    let origin_x = x;
166    static SGN: [f64; 2] = [1., -1.];
167    let sign_scale = SGN[x.is_sign_negative() as usize];
168
169    const SQRT_2_OVER_PI: DoubleDouble = DoubleDouble::new(
170        f64::from_bits(0xbc8cbc0d30ebfd15),
171        f64::from_bits(0x3fe9884533d43651),
172    );
173    const MPI_OVER_4: DoubleDouble = DoubleDouble::new(
174        f64::from_bits(0xbc81a62633145c07),
175        f64::from_bits(0xbfe921fb54442d18),
176    );
177
178    let alpha = bessel_1_asympt_alpha(recip);
179    let beta = bessel_1_asympt_beta(recip);
180
181    // Without full subtraction cancellation happens sometimes
182    let x0pi34 = DoubleDouble::full_dd_sub(MPI_OVER_4, alpha);
183    let r0 = DoubleDouble::full_dd_add(angle, x0pi34);
184
185    let m_sin = sin_dd_small(r0);
186    let z0 = DoubleDouble::quick_mult(beta, m_sin);
187    let scale = DoubleDouble::quick_mult(SQRT_2_OVER_PI, r_sqrt);
188    let r = DoubleDouble::quick_mult(scale, z0);
189
190    let p = DoubleDouble::from_exact_add(r.hi, r.lo);
191
192    let err = f_fmla(
193        p.hi,
194        f64::from_bits(0x3bc0000000000000), // 2^-67
195        f64::from_bits(0x39c0000000000000), // 2^-99
196    );
197
198    let ub = p.hi + (p.lo + err);
199    let lb = p.hi + (p.lo - err);
200
201    if ub == lb {
202        return p.to_f64() * sign_scale;
203    }
204
205    j1_asympt_hard(origin_x)
206}
207
208/**
209Generated in Sollya:
210```text
211pretty = proc(u) {
212  return ~(floor(u*1000)/1000);
213};
214
215bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
216
217f = bessel_j1(x)/x;
218d = [0, 0.921];
219w = 1;
220pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
221
222w = 1;
223or_f = bessel_j1(x);
224pf1 = pf * x;
225err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
226print ("relative error:", pretty(err_p));
227
228for i from 0 to degree(pf) by 2 do {
229    print("'", coeff(pf, i), "',");
230};
231```
232See ./notes/bessel_sollya/bessel_j1_at_zero_fast.sollya
233**/
234#[inline]
235pub(crate) fn j1_maclaurin_series_fast(x: f64) -> f64 {
236    const C0: DoubleDouble = DoubleDouble::from_bit_pair((0x3b30e9e087200000, 0x3fe0000000000000));
237    let x2 = DoubleDouble::from_exact_mult(x, x);
238    let p = f_polyeval12(
239        x2.hi,
240        f64::from_bits(0xbfb0000000000000),
241        f64::from_bits(0x3f65555555555555),
242        f64::from_bits(0xbf0c71c71c71c45e),
243        f64::from_bits(0x3ea6c16c16b82b02),
244        f64::from_bits(0xbe3845c87ec0cbef),
245        f64::from_bits(0x3dc27e0313e8534c),
246        f64::from_bits(0xbd4443dd2d0305d0),
247        f64::from_bits(0xbd0985a435fe9aa1),
248        f64::from_bits(0x3d10c82d92c46d30),
249        f64::from_bits(0xbd0aa3684321f219),
250        f64::from_bits(0x3cf8351f29ac345a),
251        f64::from_bits(0xbcd333fe6cd52c9f),
252    );
253    let mut z = DoubleDouble::mul_f64_add(x2, p, C0);
254    z = DoubleDouble::quick_mult_f64(z, x);
255
256    // squaring error (2^-56) + poly error 2^-75
257    let err = f_fmla(
258        x2.hi,
259        f64::from_bits(0x3c70000000000000), // 2^-56
260        f64::from_bits(0x3b40000000000000), // 2^-75
261    );
262    let ub = z.hi + (z.lo + err);
263    let lb = z.hi + (z.lo - err);
264
265    if ub == lb {
266        return z.to_f64();
267    }
268    j1_maclaurin_series(x)
269}
270
271/**
272Generated in Sollya:
273```text
274pretty = proc(u) {
275  return ~(floor(u*1000)/1000);
276};
277
278bessel_j1 = library("./cmake-build-release/libbessel_sollya.dylib");
279
280f = bessel_j1(x)/x;
281d = [0, 0.921];
282w = 1;
283pf = fpminimax(f, [|0,2,4,6,8,10,12,14,16,18,20,22,24|], [|107, 107, 107, 107, 107, D...|], d, absolute, floating);
284
285w = 1;
286or_f = bessel_j1(x);
287pf1 = pf * x;
288err_p = -log2(dirtyinfnorm(pf1*w-or_f, d));
289print ("relative error:", pretty(err_p));
290
291for i from 0 to degree(pf) by 2 do {
292    print("'", coeff(pf, i), "',");
293};
294```
295See ./notes/bessel_sollya/bessel_j1_at_zero.sollya
296**/
297pub(crate) fn j1_maclaurin_series(x: f64) -> f64 {
298    let origin_x = x;
299    static SGN: [f64; 2] = [1., -1.];
300    let sign_scale = SGN[x.is_sign_negative() as usize];
301    let x = x.abs();
302
303    const CL: [(u64, u64); 5] = [
304        (0xb930000000000000, 0x3fe0000000000000),
305        (0x39c8e80000000000, 0xbfb0000000000000),
306        (0x3c05555554f3add7, 0x3f65555555555555),
307        (0xbbac71c4eb0f8c94, 0xbf0c71c71c71c71c),
308        (0xbb3f56b7a43206d4, 0x3ea6c16c16c16c17),
309    ];
310
311    let dx2 = DoubleDouble::from_exact_mult(x, x);
312
313    let p = f_polyeval8(
314        dx2.hi,
315        f64::from_bits(0xbe3845c8a0ce5129),
316        f64::from_bits(0x3dc27e4fb7789ea2),
317        f64::from_bits(0xbd4522a43f633af1),
318        f64::from_bits(0x3cc2c97589d53f97),
319        f64::from_bits(0xbc3ab8151dca7912),
320        f64::from_bits(0x3baf08732286d1d4),
321        f64::from_bits(0xbb10ac65637413f4),
322        f64::from_bits(0xbae4d8336e4f779c),
323    );
324
325    let mut p_e = DoubleDouble::mul_f64_add(dx2, p, DoubleDouble::from_bit_pair(CL[4]));
326    p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[3]));
327    p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[2]));
328    p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[1]));
329    p_e = DoubleDouble::mul_add(dx2, p_e, DoubleDouble::from_bit_pair(CL[0]));
330
331    let p = DoubleDouble::quick_mult_f64(p_e, x);
332
333    let err = f_fmla(
334        p.hi,
335        f64::from_bits(0x3bd0000000000000), // 2^-66
336        f64::from_bits(0x3a00000000000000), // 2^-95
337    );
338    let ub = p.hi + (p.lo + err);
339    let lb = p.hi + (p.lo - err);
340    if ub != lb {
341        return j1_maclaurin_series_hard(origin_x);
342    }
343
344    p.to_f64() * sign_scale
345}
346
347/**
348Taylor expansion at 0
349
350Generated by SageMath:
351```python
352def print_expansion_at_0():
353    print(f"static C: [DyadicFloat128; 13] = ")
354    from mpmath import mp, j1, taylor, expm1
355    poly = taylor(lambda val: j1(val), 0, 26)
356    real_i = 0
357    print("[")
358    for i in range(1, len(poly), 2):
359        print_dyadic(poly[i])
360        real_i = real_i + 1
361    print("],")
362
363    print("];")
364
365mp.prec = 180
366
367print_expansion_at_0()
368```
369**/
370#[cold]
371#[inline(never)]
372fn j1_maclaurin_series_hard(x: f64) -> f64 {
373    static SGN: [f64; 2] = [1., -1.];
374    let sign_scale = SGN[x.is_sign_negative() as usize];
375    let x = x.abs();
376    static C: [DyadicFloat128; 13] = [
377        DyadicFloat128 {
378            sign: DyadicSign::Pos,
379            exponent: -128,
380            mantissa: 0x80000000_00000000_00000000_00000000_u128,
381        },
382        DyadicFloat128 {
383            sign: DyadicSign::Neg,
384            exponent: -131,
385            mantissa: 0x80000000_00000000_00000000_00000000_u128,
386        },
387        DyadicFloat128 {
388            sign: DyadicSign::Pos,
389            exponent: -136,
390            mantissa: 0xaaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaab_u128,
391        },
392        DyadicFloat128 {
393            sign: DyadicSign::Neg,
394            exponent: -142,
395            mantissa: 0xe38e38e3_8e38e38e_38e38e38_e38e38e4_u128,
396        },
397        DyadicFloat128 {
398            sign: DyadicSign::Pos,
399            exponent: -148,
400            mantissa: 0xb60b60b6_0b60b60b_60b60b60_b60b60b6_u128,
401        },
402        DyadicFloat128 {
403            sign: DyadicSign::Neg,
404            exponent: -155,
405            mantissa: 0xc22e4506_72894ab6_cd8efb11_d33f5618_u128,
406        },
407        DyadicFloat128 {
408            sign: DyadicSign::Pos,
409            exponent: -162,
410            mantissa: 0x93f27dbb_c4fae397_780b69f5_333c725b_u128,
411        },
412        DyadicFloat128 {
413            sign: DyadicSign::Neg,
414            exponent: -170,
415            mantissa: 0xa91521fb_2a434d3f_649f5485_f169a743_u128,
416        },
417        DyadicFloat128 {
418            sign: DyadicSign::Pos,
419            exponent: -178,
420            mantissa: 0x964bac6d_7ae67d8d_aec68405_485dea03_u128,
421        },
422        DyadicFloat128 {
423            sign: DyadicSign::Neg,
424            exponent: -187,
425            mantissa: 0xd5c0f53a_fe6fa17f_8c7b0b68_39691f4e_u128,
426        },
427        DyadicFloat128 {
428            sign: DyadicSign::Pos,
429            exponent: -196,
430            mantissa: 0xf8bb4be7_8e7896b0_58fee362_01a4370c_u128,
431        },
432        DyadicFloat128 {
433            sign: DyadicSign::Neg,
434            exponent: -205,
435            mantissa: 0xf131bdf7_cff8d02e_e1ef6820_f9d58ab6_u128,
436        },
437        DyadicFloat128 {
438            sign: DyadicSign::Pos,
439            exponent: -214,
440            mantissa: 0xc5e72c48_0d1aec75_3caa2e0d_edd008ca_u128,
441        },
442    ];
443
444    let rx = DyadicFloat128::new_from_f64(x);
445    let dx = rx * rx;
446
447    let mut p = C[12];
448    for i in (0..12).rev() {
449        p = dx * p + C[i];
450    }
451
452    (p * rx).fast_as_f64() * sign_scale
453}
454
455/// This method on small range searches for nearest zero or extremum.
456/// Then picks stored series expansion at the point end evaluates the poly at the point.
457#[inline]
458pub(crate) fn j1_small_argument_fast(x: f64) -> f64 {
459    static SIGN: [f64; 2] = [1., -1.];
460    let sign_scale = SIGN[x.is_sign_negative() as usize];
461    let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
462
463    // let avg_step = 74.60109 / 47.0;
464    // let inv_step = 1.0 / avg_step;
465
466    const INV_STEP: f64 = 0.6300176043004198;
467
468    let fx = x_abs * INV_STEP;
469    const J1_ZEROS_COUNT: f64 = (J1_ZEROS.len() - 1) as f64;
470    let idx0 = unsafe { fx.min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
471    let idx1 = unsafe { fx.ceil().min(J1_ZEROS_COUNT).to_int_unchecked::<usize>() };
472
473    let found_zero0 = DoubleDouble::from_bit_pair(J1_ZEROS[idx0]);
474    let found_zero1 = DoubleDouble::from_bit_pair(J1_ZEROS[idx1]);
475
476    let dist0 = (found_zero0.hi - x_abs).abs();
477    let dist1 = (found_zero1.hi - x_abs).abs();
478
479    let (found_zero, idx, dist) = if dist0 < dist1 {
480        (found_zero0, idx0, dist0)
481    } else {
482        (found_zero1, idx1, dist1)
483    };
484
485    if idx == 0 {
486        return j1_maclaurin_series_fast(x);
487    }
488
489    let r = DoubleDouble::full_add_f64(-found_zero, x_abs);
490
491    // We hit exact zero, value, better to return it directly
492    if dist == 0. {
493        return f64::from_bits(J1_ZEROS_VALUE[idx]) * sign_scale;
494    }
495
496    let is_zero_too_close = dist.abs() < 1e-3;
497
498    let c = if is_zero_too_close {
499        &J1_COEFFS_TAYLOR[idx - 1]
500    } else {
501        &J1_COEFFS[idx - 1]
502    };
503
504    let p = f_polyeval19(
505        r.hi,
506        f64::from_bits(c[5].1),
507        f64::from_bits(c[6].1),
508        f64::from_bits(c[7].1),
509        f64::from_bits(c[8].1),
510        f64::from_bits(c[9].1),
511        f64::from_bits(c[10].1),
512        f64::from_bits(c[11].1),
513        f64::from_bits(c[12].1),
514        f64::from_bits(c[13].1),
515        f64::from_bits(c[14].1),
516        f64::from_bits(c[15].1),
517        f64::from_bits(c[16].1),
518        f64::from_bits(c[17].1),
519        f64::from_bits(c[18].1),
520        f64::from_bits(c[19].1),
521        f64::from_bits(c[20].1),
522        f64::from_bits(c[21].1),
523        f64::from_bits(c[22].1),
524        f64::from_bits(c[23].1),
525    );
526
527    let mut z = DoubleDouble::mul_f64_add(r, p, DoubleDouble::from_bit_pair(c[4]));
528    z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[3]));
529    z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[2]));
530    z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[1]));
531    z = DoubleDouble::mul_add(z, r, DoubleDouble::from_bit_pair(c[0]));
532    let err = f_fmla(
533        z.hi,
534        f64::from_bits(0x3c70000000000000), // 2^-56
535        f64::from_bits(0x3bf0000000000000), // 2^-64
536    );
537    let ub = z.hi + (z.lo + err);
538    let lb = z.hi + (z.lo - err);
539
540    if ub == lb {
541        return z.to_f64() * sign_scale;
542    }
543
544    j1_small_argument_dd(sign_scale, r, c)
545}
546
547fn j1_small_argument_dd(sign_scale: f64, r: DoubleDouble, c0: &[(u64, u64); 24]) -> f64 {
548    let c = &c0[15..];
549
550    let p0 = f_polyeval9(
551        r.to_f64(),
552        f64::from_bits(c[0].1),
553        f64::from_bits(c[1].1),
554        f64::from_bits(c[2].1),
555        f64::from_bits(c[3].1),
556        f64::from_bits(c[4].1),
557        f64::from_bits(c[5].1),
558        f64::from_bits(c[6].1),
559        f64::from_bits(c[7].1),
560        f64::from_bits(c[8].1),
561    );
562
563    let c = c0;
564
565    let mut p_e = DoubleDouble::mul_f64_add(r, p0, DoubleDouble::from_bit_pair(c[14]));
566    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[13]));
567    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[12]));
568    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[11]));
569    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[10]));
570    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[9]));
571    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[8]));
572    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[7]));
573    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[6]));
574    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[5]));
575    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[4]));
576    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[3]));
577    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[2]));
578    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[1]));
579    p_e = DoubleDouble::mul_add(p_e, r, DoubleDouble::from_bit_pair(c[0]));
580
581    let p = DoubleDouble::from_exact_add(p_e.hi, p_e.lo);
582    let err = f_fmla(
583        p.hi,
584        f64::from_bits(0x3c10000000000000), // 2^-62
585        f64::from_bits(0x3a00000000000000), // 2^-95
586    );
587    let ub = p.hi + (p.lo + err);
588    let lb = p.hi + (p.lo - err);
589    if ub != lb {
590        return j1_small_argument_path_hard(sign_scale, r, c);
591    }
592    p.to_f64() * sign_scale
593}
594
595#[cold]
596#[inline(never)]
597fn j1_small_argument_path_hard(sign_scale: f64, r: DoubleDouble, c: &[(u64, u64); 24]) -> f64 {
598    let mut p = DoubleDouble::from_bit_pair(c[23]);
599    for i in (0..23).rev() {
600        p = DoubleDouble::mul_add(r, p, DoubleDouble::from_bit_pair(c[i]));
601        p = DoubleDouble::from_exact_add(p.hi, p.lo);
602    }
603    p.to_f64() * sign_scale
604}
605
606/*
607   Evaluates:
608   J1 = sqrt(2/(PI*x)) * beta(x) * cos(x - 3*PI/4 - alpha(x))
609   discarding 1*PI/2 using identities gives:
610   J1 = sqrt(2/(PI*x)) * beta(x) * sin(x - PI/4 - alpha(x))
611
612   to avoid squashing small (-PI/4 - alpha(x)) into a large x actual expansion is:
613
614   J1 = sqrt(2/(PI*x)) * beta(x) * sin((x mod 2*PI) - PI/4 - alpha(x))
615
616   This method is required for situations where x*x or 1/(x*x) will overflow
617*/
618#[cold]
619#[inline(never)]
620fn j1_asympt_hard(x: f64) -> f64 {
621    static SGN: [f64; 2] = [1., -1.];
622    let sign_scale = SGN[x.is_sign_negative() as usize];
623    let x = x.abs();
624
625    const SQRT_2_OVER_PI: DyadicFloat128 = DyadicFloat128 {
626        sign: DyadicSign::Pos,
627        exponent: -128,
628        mantissa: 0xcc42299e_a1b28468_7e59e280_5d5c7180_u128,
629    };
630
631    const MPI_OVER_4: DyadicFloat128 = DyadicFloat128 {
632        sign: DyadicSign::Neg,
633        exponent: -128,
634        mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
635    };
636
637    let x_dyadic = DyadicFloat128::new_from_f64(x);
638    let recip = DyadicFloat128::accurate_reciprocal(x);
639
640    let alpha = bessel_1_asympt_alpha_hard(recip);
641    let beta = bessel_1_asympt_beta_hard(recip);
642
643    let angle = rem2pi_f128(x_dyadic);
644
645    let x0pi34 = MPI_OVER_4 - alpha;
646    let r0 = angle + x0pi34;
647
648    let m_sin = sin_f128_small(r0);
649
650    let z0 = beta * m_sin;
651    let r_sqrt = bessel_rsqrt_hard(x, recip);
652    let scale = SQRT_2_OVER_PI * r_sqrt;
653    let p = scale * z0;
654    p.fast_as_f64() * sign_scale
655}
656
657#[cfg(test)]
658mod tests {
659    use super::*;
660
661    #[test]
662    fn test_j1() {
663        assert_eq!(f_j1(0.000000000000000000000000000000001241), 6.205e-34);
664        assert_eq!(f_j1(0.0000000000000000000000000000004321), 2.1605e-31);
665        assert_eq!(f_j1(0.00000000000000000004321), 2.1605e-20);
666        assert_eq!(f_j1(73.81695991658546), -0.06531447184607607);
667        assert_eq!(f_j1(0.01), 0.004999937500260416);
668        assert_eq!(f_j1(0.9), 0.4059495460788057);
669        assert_eq!(
670            f_j1(162605674999778540000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
671            0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008686943178258183
672        );
673        assert_eq!(f_j1(3.831705970207517), -1.8501090915423025e-15);
674        assert_eq!(f_j1(-3.831705970207517), 1.8501090915423025e-15);
675        assert_eq!(f_j1(-6.1795701510782757E+307), 8.130935041593236e-155);
676        assert_eq!(
677            f_j1(0.000000000000000000000000000000000000008827127),
678            0.0000000000000000000000000000000000000044135635
679        );
680        assert_eq!(
681            f_j1(-0.000000000000000000000000000000000000008827127),
682            -0.0000000000000000000000000000000000000044135635
683        );
684        assert_eq!(f_j1(5.4), -0.3453447907795863);
685        assert_eq!(
686            f_j1(77.743162408196766932633181568235159),
687            0.09049267898021947
688        );
689        assert_eq!(
690            f_j1(84.027189586293545175976760219782591),
691            0.0870430264022591
692        );
693        assert_eq!(f_j1(f64::NEG_INFINITY), 0.0);
694        assert_eq!(f_j1(f64::INFINITY), 0.0);
695        assert!(f_j1(f64::NAN).is_nan());
696    }
697}