1use crate::asin_eval_dyadic::asin_eval_dyadic;
30use crate::common::f_fmla;
31use crate::double_double::DoubleDouble;
32use crate::dyadic_float::{DyadicFloat128, DyadicSign};
33use crate::round::RoundFinite;
34
35static ASIN_COEFFS: [[u64; 12]; 9] = [
36 [
37 0x3ff0000000000000,
38 0x0000000000000000,
39 0x3fc5555555555555,
40 0x3c65555555555555,
41 0x3fb3333333333333,
42 0x3fa6db6db6db6db7,
43 0x3f9f1c71c71c71c7,
44 0x3f96e8ba2e8ba2e9,
45 0x3f91c4ec4ec4ec4f,
46 0x3f8c99999999999a,
47 0x3f87a87878787878,
48 0x3f83fde50d79435e,
49 ],
50 [
51 0x3ff015a397cf0f1c,
52 0xbc8eebd6ccfe3ee3,
53 0x3fc5f3581be7b08b,
54 0xbc65df80d0e7237d,
55 0x3fb4519ddf1ae530,
56 0x3fa8eb4b6eeb1696,
57 0x3fa17bc85420fec8,
58 0x3f9a8e39b5dcad81,
59 0x3f953f8df127539b,
60 0x3f91a485a0b0130a,
61 0x3f8e20e6e4930020,
62 0x3f8a466a7030f4c9,
63 ],
64 [
65 0x3ff02be9ce0b87cd,
66 0x3c7e5d09da2e0f04,
67 0x3fc69ab5325bc359,
68 0xbc692f480cfede2d,
69 0x3fb58a4c3097aab1,
70 0x3fab3db36068dd80,
71 0x3fa3b94821846250,
72 0x3f9eedc823765d21,
73 0x3f998e35d756be6b,
74 0x3f95ea4f1b32731a,
75 0x3f9355115764148e,
76 0x3f916a5853847c91,
77 ],
78 [
79 0x3ff042dc6a65ffbf,
80 0xbc8c7ea28dce95d1,
81 0x3fc74c4bd7412f9d,
82 0x3c5447024c0a3c87,
83 0x3fb6e09c6d2b72b9,
84 0x3faddd9dcdae5315,
85 0x3fa656f1f64058b8,
86 0x3fa21a42e4437101,
87 0x3f9eed0350b7edb2,
88 0x3f9b6bc877e58c52,
89 0x3f9903a0872eb2a4,
90 0x3f974da839ddd6d8,
91 ],
92 [
93 0x3ff05a8621feb16b,
94 0xbc7e5b33b1407c5f,
95 0x3fc809186c2e57dd,
96 0xbc33dcb4d6069407,
97 0x3fb8587d99442dc5,
98 0x3fb06c23d1e75be3,
99 0x3fa969024051c67d,
100 0x3fa54e4f934aacfd,
101 0x3fa2d60a732dbc9c,
102 0x3fa149f0c046eac7,
103 0x3fa053a56dba1fba,
104 0x3f9f7face3343992,
105 ],
106 [
107 0x3ff072f2b6f1e601,
108 0xbc92dcbb05419970,
109 0x3fc8d2397127aeba,
110 0x3c6ead0c497955fb,
111 0x3fb9f68df88da518,
112 0x3fb21ee26a5900d7,
113 0x3fad08e7081b53a9,
114 0x3fa938dd661713f7,
115 0x3fa71b9f299b72e6,
116 0x3fa5fbc7d2450527,
117 0x3fa58573247ec325,
118 0x3fa585a174a6a4ce,
119 ],
120 [
121 0x3ff08c2f1d638e4c,
122 0x3c7b47c159534a3d,
123 0x3fc9a8f592078624,
124 0xbc6ea339145b65cd,
125 0x3fbbc04165b57aab,
126 0x3fb410df5f58441d,
127 0x3fb0ab6bdf5f8f70,
128 0x3fae0b92eea1fce1,
129 0x3fac9094e443a971,
130 0x3fac34651d64bc74,
131 0x3facaa008d1af080,
132 0x3fadc165bc0c4fc5,
133 ],
134 [
135 0x3ff0a649a73e61f2,
136 0x3c874ac0d817e9c7,
137 0x3fca8ec30dc93890,
138 0xbc48ab1c0eef300c,
139 0x3fbdbc11ea95061b,
140 0x3fb64e371d661328,
141 0x3fb33e0023b3d895,
142 0x3fb2042269c243ce,
143 0x3fb1cce74bda2230,
144 0x3fb244d425572ce9,
145 0x3fb34d475c7f1e3e,
146 0x3fb4d4e653082ad3,
147 ],
148 [
149 0x3ff0c152382d7366,
150 0xbc9ee6913347c2a6,
151 0x3fcb8550d62bfb6d,
152 0xbc6d10aec3f116d5,
153 0x3fbff1bde0fa3ca0,
154 0x3fb8e5f3ab69f6a4,
155 0x3fb656be8b6527ce,
156 0x3fb5c39755dc041a,
157 0x3fb661e6ebd40599,
158 0x3fb7ea3dddee2a4f,
159 0x3fba4f439abb4869,
160 0x3fbd9181c0fda658,
161 ],
162];
163
164#[inline]
165pub(crate) fn asin_eval(u: DoubleDouble, err: f64) -> (DoubleDouble, f64) {
166 let k = (u.hi * f64::from_bits(0x4040000000000000)).round_finite();
168 let idx = k as u64;
169 let y_hi = f_fmla(k, f64::from_bits(0xbfa0000000000000), u.hi); let y = DoubleDouble::from_exact_add(y_hi, u.lo);
172 let y2 = y.hi * y.hi;
173 let err = f_fmla(err, y2, f64::from_bits(0x3990000000000000));
175 let coeffs = ASIN_COEFFS[idx as usize];
176 let c0 = DoubleDouble::quick_mult(
177 y,
178 DoubleDouble::new(f64::from_bits(coeffs[3]), f64::from_bits(coeffs[2])),
179 );
180 let c1 = f_fmla(y.hi, f64::from_bits(coeffs[5]), f64::from_bits(coeffs[4]));
181 let c2 = f_fmla(y.hi, f64::from_bits(coeffs[7]), f64::from_bits(coeffs[6]));
182 let c3 = f_fmla(y.hi, f64::from_bits(coeffs[9]), f64::from_bits(coeffs[8]));
183 let c4 = f_fmla(y.hi, f64::from_bits(coeffs[11]), f64::from_bits(coeffs[10]));
184
185 let y4 = y2 * y2;
186 let d0 = f_fmla(y2, c2, c1);
187 let d1 = f_fmla(y2, c4, c3);
188
189 let mut r = DoubleDouble::from_exact_add(f64::from_bits(coeffs[0]), c0.hi);
190
191 let e1 = f_fmla(y4, d1, d0);
192
193 r.lo = f_fmla(y2, e1, f64::from_bits(coeffs[1]) + c0.lo + r.lo);
194
195 (r, err)
196}
197
198pub fn f_asin(x: f64) -> f64 {
202 let x_e = (x.to_bits() >> 52) & 0x7ff;
203 const E_BIAS: u64 = (1u64 << (11 - 1u64)) - 1u64;
204
205 let x_abs = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
206
207 if x_e < E_BIAS - 1 {
209 if x_e < E_BIAS - 26 {
211 if x.abs() == 0. {
219 return x;
220 }
221 let eps = f64::copysign(f64::MIN_POSITIVE, x);
223 let normalize_const = if x_e == 0 { eps } else { 0.0 };
224 let scaled_normal =
225 f_fmla(x + normalize_const, f64::from_bits(0x4350000000000000), eps);
226 return f_fmla(
227 scaled_normal,
228 f64::from_bits(0x3c90000000000000),
229 -normalize_const,
230 );
231 }
232
233 let x_sq = DoubleDouble::from_exact_mult(x, x);
234 let err = x_abs * f64::from_bits(0x3cc0000000000000);
235 let (p, err) = asin_eval(x_sq, err);
239 let r0 = DoubleDouble::from_exact_mult(x, p.hi);
241 let r_lo = f_fmla(x, p.lo, r0.lo);
242
243 let r_upper = r0.hi + (r_lo + err);
244 let r_lower = r0.hi + (r_lo - err);
245
246 if r_upper == r_lower {
247 return r_upper;
248 }
249
250 let idx = (x_sq.hi * f64::from_bits(0x4050000000000000)).round_finite() as usize;
254
255 let x_f128 = DyadicFloat128::new_from_f64(x);
259
260 let u: DyadicFloat128;
261 #[cfg(any(
262 all(
263 any(target_arch = "x86", target_arch = "x86_64"),
264 target_feature = "fma"
265 ),
266 all(target_arch = "aarch64", target_feature = "neon")
267 ))]
268 {
269 let u_hi = DyadicFloat128::new_from_f64(f_fmla(
271 idx as f64,
272 f64::from_bits(0xbf90000000000000),
273 x_sq.hi,
274 ));
275 u = u_hi.quick_add(&DyadicFloat128::new_from_f64(x_sq.lo));
276 }
277
278 #[cfg(not(any(
279 all(
280 any(target_arch = "x86", target_arch = "x86_64"),
281 target_feature = "fma"
282 ),
283 all(target_arch = "aarch64", target_feature = "neon")
284 )))]
285 {
286 let x_sq_f128 = x_f128.quick_mul(&x_f128);
287 u = x_sq_f128.quick_add(&DyadicFloat128::new_from_f64(
288 idx as f64 * (f64::from_bits(0xbf90000000000000)),
289 ));
290 }
291
292 let p_f128 = asin_eval_dyadic(u, idx);
293 let r = x_f128.quick_mul(&p_f128);
294 return r.fast_as_f64();
295 }
296
297 const PI_OVER_TWO: DoubleDouble = DoubleDouble::new(
298 f64::from_bits(0x3c91a62633145c07),
299 f64::from_bits(0x3ff921fb54442d18),
300 );
301
302 let x_sign = if x.is_sign_negative() { -1.0 } else { 1.0 };
303
304 if x_e >= E_BIAS {
306 if x_abs == 1.0 {
308 return f_fmla(x_sign, PI_OVER_TWO.hi, x_sign * PI_OVER_TWO.lo);
310 }
311 if x.is_nan() {
313 return x;
314 }
315 return f64::NAN;
316 }
317
318 let u = f_fmla(x_abs, -0.5, 0.5);
320 let v_hi = u.sqrt();
331 let h;
332 #[cfg(any(
333 all(
334 any(target_arch = "x86", target_arch = "x86_64"),
335 target_feature = "fma"
336 ),
337 all(target_arch = "aarch64", target_feature = "neon")
338 ))]
339 {
340 h = f_fmla(v_hi, -v_hi, u);
341 }
342 #[cfg(not(any(
343 all(
344 any(target_arch = "x86", target_arch = "x86_64"),
345 target_feature = "fma"
346 ),
347 all(target_arch = "aarch64", target_feature = "neon")
348 )))]
349 {
350 let v_hi_sq = DoubleDouble::from_exact_mult(v_hi, v_hi);
351 h = (u - v_hi_sq.hi) - v_hi_sq.lo;
352 }
353 let vh = v_hi * 2.0;
357 let vl = h / v_hi;
358
359 let err = vh * f64::from_bits(0x3cc0000000000000);
362
363 let (p, err) = asin_eval(DoubleDouble::new(0.0, u), err);
364
365 let r0 = DoubleDouble::quick_mult(DoubleDouble::new(vl, vh), p);
368 let r = DoubleDouble::from_exact_add(PI_OVER_TWO.hi, -r0.hi);
369
370 let r_lo = PI_OVER_TWO.lo - r0.lo + r.lo;
371
372 let (r_upper, r_lower);
373
374 #[cfg(any(
375 all(
376 any(target_arch = "x86", target_arch = "x86_64"),
377 target_feature = "fma"
378 ),
379 all(target_arch = "aarch64", target_feature = "neon")
380 ))]
381 {
382 r_upper = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, err));
383 r_lower = f_fmla(r.hi, x_sign, f_fmla(r_lo, x_sign, -err));
384 }
385 #[cfg(not(any(
386 all(
387 any(target_arch = "x86", target_arch = "x86_64"),
388 target_feature = "fma"
389 ),
390 all(target_arch = "aarch64", target_feature = "neon")
391 )))]
392 {
393 let r_lo = r_lo * x_sign;
394 let r_hi = r.hi * x_sign;
395 r_upper = r_hi + (r_lo + err);
396 r_lower = r.hi + (r_lo - err);
397 }
398
399 if r_upper == r_lower {
400 return r_upper;
401 }
402
403 let idx = (u * f64::from_bits(0x4050000000000000)).round_finite() as usize;
406
407 let vl_lo;
427 #[cfg(any(
428 all(
429 any(target_arch = "x86", target_arch = "x86_64"),
430 target_feature = "fma"
431 ),
432 all(target_arch = "aarch64", target_feature = "neon")
433 ))]
434 {
435 vl_lo = f_fmla(-v_hi, vl, h) / v_hi;
436 }
437 #[cfg(not(any(
438 all(
439 any(target_arch = "x86", target_arch = "x86_64"),
440 target_feature = "fma"
441 ),
442 all(target_arch = "aarch64", target_feature = "neon")
443 )))]
444 {
445 let vh_vl = DoubleDouble::from_exact_mult(v_hi, vl);
446 vl_lo = ((h - vh_vl.hi) - vh_vl.lo) / v_hi;
447 }
448
449 let t = h * (-0.25) / u;
451 let vll = f_fmla(vl, t, vl_lo);
452 let mv0 = DyadicFloat128::new_from_f64(vl) + DyadicFloat128::new_from_f64(vll);
454 let mut m_v = DyadicFloat128::new_from_f64(vh) + mv0;
455 m_v.sign = DyadicSign::Neg;
456
457 let y_f128 =
460 DyadicFloat128::new_from_f64(f_fmla(idx as f64, f64::from_bits(0xbf90000000000000), u));
461
462 const PI_OVER_TWO_F128: DyadicFloat128 = DyadicFloat128 {
463 sign: DyadicSign::Pos,
464 exponent: -127,
465 mantissa: 0xc90fdaa2_2168c234_c4c6628b_80dc1cd1_u128,
466 };
467
468 let p_f128 = asin_eval_dyadic(y_f128, idx);
469 let r0_f128 = m_v.quick_mul(&p_f128);
470 let mut r_f128 = PI_OVER_TWO_F128.quick_add(&r0_f128);
471
472 if x.is_sign_negative() {
473 r_f128.sign = DyadicSign::Neg;
474 }
475
476 r_f128.fast_as_f64()
477}
478
479#[cfg(test)]
480mod tests {
481 use super::*;
482
483 #[test]
484 fn f_asin_test() {
485 assert_eq!(f_asin(-0.4), -0.41151684606748806);
486 assert_eq!(f_asin(-0.8), -0.9272952180016123);
487 assert_eq!(f_asin(0.3), 0.3046926540153975);
488 assert_eq!(f_asin(0.6), 0.6435011087932844);
489 }
490}