1use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::shared_eval::poly_dd_3;
32
33pub(crate) static ATAN_CIRCLE: [[u16; 3]; 31] = [
34 [419, 81, 0],
35 [500, 81, 0],
36 [582, 163, 0],
37 [745, 163, 0],
38 [908, 326, 0],
39 [1234, 326, 0],
40 [1559, 651, 0],
41 [2210, 650, 1],
42 [2860, 1299, 3],
43 [4156, 1293, 4],
44 [5444, 2569, 24],
45 [7989, 2520, 32],
46 [10476, 4917, 168],
47 [15224, 4576, 200],
48 [19601, 8341, 838],
49 [27105, 6648, 731],
50 [33036, 10210, 1998],
51 [41266, 6292, 1117],
52 [46469, 7926, 2048],
53 [52375, 4038, 849],
54 [55587, 4591, 1291],
55 [58906, 2172, 479],
56 [60612, 2390, 688],
57 [62325, 1107, 247],
58 [63192, 1207, 349],
59 [64056, 556, 124],
60 [64491, 605, 175],
61 [64923, 278, 62],
62 [65141, 303, 88],
63 [65358, 139, 31],
64 [65467, 151, 44],
65];
66
67pub(crate) static ATAN_REDUCE: [(u64, u64); 129] = [
68 (0x0000000000000000, 0x0000000000000000),
69 (0x3f89224e047e368e, 0x3c1a3ca6c727c59d),
70 (0x3f992346247a91f0, 0x3bf138b0ef96a186),
71 (0x3fa2dbaae9a05db0, 0x3c436e7f8a3f5e42),
72 (0x3fa927278a3b1162, 0xbbfac986efb92662),
73 (0x3faf7495ea3f3783, 0x3c406ec8011ee816),
74 (0x3fb2e239ccff3831, 0xbc5858437d431332),
75 (0x3fb60b9f7597fdec, 0xbc3cebd13eb7c513),
76 (0x3fb936bb8c5b2da2, 0xbc5840cac0d81db5),
77 (0x3fbc63ce377fc802, 0x3c5400b0fdaa109e),
78 (0x3fbf93183a8db9e9, 0x3c40e04e06c86e72),
79 (0x3fc1626d85a91e70, 0x3c4f7ad829163ca7),
80 (0x3fc2fcac73a60640, 0xbc52680735ce2cd8),
81 (0x3fc4986a74cf4e57, 0xbc690559690b42e4),
82 (0x3fc635c990ce0d36, 0x3c591d29110b41aa),
83 (0x3fc7d4ec54fb5968, 0xbc4ea90e27182780),
84 (0x3fc975f5e0553158, 0xbc2dc82ac14e3e1c),
85 (0x3fcb1909efd8b762, 0xbc573a10fd13daaf),
86 (0x3fccbe4ceb4b4cf2, 0xbc63a7ffbeabda0b),
87 (0x3fce65e3f27c9f2a, 0xbc6db6627a24d523),
88 (0x3fd007fa758626ae, 0xbc645f97dd3099f6),
89 (0x3fd0de53475f3b3c, 0xbc66293f68741816),
90 (0x3fd1b6103d3597e9, 0xbc6ab240d40633e9),
91 (0x3fd28f459ecad74d, 0xbc2de34d14e832e0),
92 (0x3fd36a08355c63dc, 0x3c6af540d9fb4926),
93 (0x3fd4466d542bac92, 0x3c6da60fdbc82ac4),
94 (0x3fd5248ae1701b17, 0xbc792a601170138a),
95 (0x3fd604775fbb27df, 0xbc67f1fca1d5d15b),
96 (0x3fd6e649f7d78649, 0xbc64e223ea716c7b),
97 (0x3fd7ca1a832d0f84, 0x3c7b24c824ac51fc),
98 (0x3fd8b00196b3d022, 0x3c64314cd132ba43),
99 (0x3fd998188e816bf0, 0xbc711f1e0817879a),
100 (0x3fda827999fcef32, 0xbc6c3dea4dbad538),
101 (0x3fdb6f3fc8c61e5b, 0x3c660d1b780ee3eb),
102 (0x3fdc5e87185e67b6, 0xbc4ab5edb7dfa545),
103 (0x3fdd506c82a2c800, 0xbc68e1437048b5bd),
104 (0x3fde450e0d273e7a, 0xbc706951c97b050f),
105 (0x3fdf3c8ad985d9ee, 0xbc414af9522ab518),
106 (0x3fe01b819b5a7cf7, 0xbc7aba0d7d97d1f2),
107 (0x3fe09a4c59bd0d4d, 0x3c4095bc4ebc2c42),
108 (0x3fe11ab7190834ec, 0x3c8798826fa27774),
109 (0x3fe19cd3fe8e405d, 0x3c8008f6258fc98f),
110 (0x3fe220b5ef047825, 0xbc5462af7ceb7de6),
111 (0x3fe2a6709a74f289, 0xbc71184dfd78b472),
112 (0x3fe32e1889047ffd, 0x3c79141876dc40c5),
113 (0x3fe3b7c3289ed6f3, 0x3c8481c20189726c),
114 (0x3fe44386db9ce5db, 0x3c82e851bd025441),
115 (0x3fe4d17b087b265d, 0x3c713ada9b8bc419),
116 (0x3fe561b82ab7f990, 0xbc805b4c3c4cbee8),
117 (0x3fe5f457e4f4812e, 0xbc85619249bd96f1),
118 (0x3fe6897514751db6, 0xbc6b0a0fbcafc671),
119 (0x3fe7212be621be6d, 0xbc819ff2dc66da45),
120 (0x3fe7bb99ed2990cf, 0x3c81320449592d92),
121 (0x3fe858de3b716571, 0xbc81fddcd2f3da8e),
122 (0x3fe8f9197bf85eeb, 0x3c6d44a42e35cc97),
123 (0x3fe99c6e0f634394, 0xbc7585a178b4a18d),
124 (0x3fea43002ae42850, 0x3c6f95a531b3a970),
125 (0x3feaecf5f9ba35a6, 0xbc396c2d43ca3392),
126 (0x3feb9a77c18c1af2, 0xbc6a5bed94b05def),
127 (0x3fec4bb009e77983, 0x3c454509d2bff511),
128 (0x3fed00cbc7384d2e, 0xbc6b4c867cef300c),
129 (0x3fedb9fa89953fcf, 0xbc1ddfac663d6bc6),
130 (0x3fee776eafc91706, 0xbc7a510683ff7cb6),
131 (0x3fef395d9f0e3c92, 0x3c44fdcd8e4e8710),
132 (0x3ff0000000000000, 0x0000000000000000),
133 (0x3ff065c900aaf2d8, 0xbc8deec7fc9042ad),
134 (0x3ff0ce29d0883c99, 0xbc8395ae45e0657d),
135 (0x3ff139447e6a86ee, 0x3c8332cf301a97f3),
136 (0x3ff1a73d55278c4b, 0xbc86cc8c4b78213b),
137 (0x3ff2183b0c4573ff, 0x3c870a90841da57a),
138 (0x3ff28c66fdaf8f09, 0xbc6ba39bad450ee0),
139 (0x3ff303ed61109e20, 0xbc88692946d9f93c),
140 (0x3ff37efd8d87607e, 0x3c63b711bf765b58),
141 (0x3ff3fdca42847507, 0x3c7c21387985b081),
142 (0x3ff48089f8bf42cc, 0xbc87ddb19d3d0efc),
143 (0x3ff507773c537ead, 0xbc7f5e354cf971f3),
144 (0x3ff592d11142fa55, 0xbc700f0ad675330d),
145 (0x3ff622db63c8ecc2, 0xbc82c93f50ab2c0e),
146 (0x3ff6b7df86265200, 0x3c7bec391adc37d5),
147 (0x3ff7522cbdd428a8, 0xbc69686ddc9ffcf5),
148 (0x3ff7f218e25a7461, 0xbc78d16529514246),
149 (0x3ff89801106cc709, 0xbc8092f51e9c2803),
150 (0x3ff9444a7462122a, 0xbc807c06755404c4),
151 (0x3ff9f7632fa9e871, 0x3c802e0d43abc92b),
152 (0x3ffab1c35d8a74ea, 0x3c5d0184e48af6f7),
153 (0x3ffb73ee3c3ef16a, 0x3c773be957380bc2),
154 (0x3ffc3e738086bc0f, 0xbc702b6e26c84462),
155 (0x3ffd11f0dae40609, 0x3c525c4f3ffa6e1f),
156 (0x3ffdef13b73c1406, 0xbc5e302db3c6823f),
157 (0x3ffed69b4153a45d, 0x3c73207830326c0e),
158 (0x3fffc95abad6cf4a, 0xbc66308cee7927bf),
159 (0x4000641e192ceab3, 0xbc70147ebf0df4c5),
160 (0x4000ea21d716fbf7, 0xbc7168533cc41d8b),
161 (0x40017749711a6679, 0xbc652a0b0333e9c5),
162 (0x40020c36c6a7f38e, 0x3c68659eece35395),
163 (0x4002a99f50fd4f4f, 0x3c820fcad18cb36f),
164 (0x4003504f333f9de6, 0xbc752afdbd5a8c74),
165 (0x4004012ce2586a17, 0xbc79747a792907d7),
166 (0x4004bd3d87fe0650, 0x3c790c59393b52c8),
167 (0x400585aa4e1530fa, 0x3c7af6934f13a3a8),
168 (0x40065bc6cc825147, 0xbc48534dcab5ad3e),
169 (0x40074118e4b6a7c8, 0xbc7555aa8bfca9a1),
170 (0x400837626d70fdb8, 0xbc556b3fee9ca72b),
171 (0x400940ad30abc792, 0x3c54b3fdd4fdc06c),
172 (0x400a5f59e90600dd, 0x3c6285d367c55ddc),
173 (0x400b9633283b6d14, 0xbc48712976f17a16),
174 (0x400ce885653127e7, 0xbc3abe8ab65d49fc),
175 (0x400e5a3de972a377, 0x3c5cd9be81ad764b),
176 (0x400ff01305ecd8dc, 0x3c4742c2922656fa),
177 (0x4010d7dc7cff4c9e, 0xbc77c842978bee09),
178 (0x4011d0143e71565f, 0x3c67bc7dea7c3c03),
179 (0x4012e4ff1626b949, 0x3c4aefbe25b404e9),
180 (0x40141bfee2424771, 0xbc34bcfaaa95cb2c),
181 (0x40157be4eaa5e11b, 0x3c50fe741e4ec679),
182 (0x40170d751908c1b1, 0x3c5fe74a5b0ec709),
183 (0x4018dc25c117782b, 0x3c50ca1c19f710ef),
184 (0x401af73f4ca3310f, 0x3c52867b40ba77d6),
185 (0x401d7398d15e70db, 0x3c60fd4e0d4b1547),
186 (0x4020372fb36b87e2, 0x3c5c16c9ecc1621d),
187 (0x402208dbdae055ef, 0x3c56b81a36e75e8c),
188 (0x40244e6c595afdcc, 0xbc57c22045771848),
189 (0x4027398c57f3f1ad, 0x3c5970503be105c0),
190 (0x402b1d03c03d2f7f, 0xbc3f299d010aead2),
191 (0x403046e9fe60a77e, 0x3c5d2b61deff33ec),
192 (0x40345affed201b55, 0x3bf0e84d9567203a),
193 (0x403b267195b1ffae, 0xbbfad44b44b92653),
194 (0x40445e2455e4aaa7, 0xbc3296d577b5e21d),
195 (0x40545eed6854ce99, 0x3c02db53886013ca),
196 (0x0000000000000000, 0x0000000000000000),
197];
198
199#[cold]
200fn atan_refine(x: f64, a: f64) -> f64 {
201 const CH: [(u64, u64); 3] = [
202 (0xbfd5555555555555, 0xbc75555555555555),
203 (0x3fc999999999999a, 0xbc6999999999bcb8),
204 (0xbfc2492492492492, 0xbc6249242093c016),
205 ];
206 const CL: [u64; 4] = [
207 0x3fbc71c71c71c71c,
208 0xbfb745d1745d1265,
209 0x3fb3b13b115bcbc4,
210 0xbfb1107c41ad3253,
211 ];
212 let phi = f_fmla(a.abs(), f64::from_bits(0x40545f306dc9c883), 256.5).to_bits();
213 let i = ((phi >> (52 - 8)) & 0xff) as i64;
214 let h: DoubleDouble = if i == 128 {
215 DoubleDouble::from_quick_recip(-x)
216 } else {
217 let ta = f64::copysign(f64::from_bits(ATAN_REDUCE[i as usize].0), x);
218 let dzt = DoubleDouble::from_exact_mult(x, ta);
219 let zmta = x - ta;
220 let v = 1.0 + dzt.hi;
221 let d = 1.0 - v;
222 let ev = (d + dzt.hi) - ((d + v) - 1.0) + dzt.lo;
223 let r = 1.0 / v;
224 let rl = f_fmla(-ev, r, dd_fmla(r, -v, 1.0)) * r;
225 DoubleDouble::quick_mult_f64(DoubleDouble::new(rl, r), zmta)
226 };
227 let h2 = DoubleDouble::quick_mult(h, h);
228 let h3 = DoubleDouble::quick_mult(h, h2);
229 let h4 = h2.hi * h2.hi;
230
231 let zw0 = f_fmla(h2.hi, f64::from_bits(CL[3]), f64::from_bits(CL[2]));
232 let zw1 = f_fmla(h2.hi, f64::from_bits(CL[1]), f64::from_bits(CL[0]));
233 let zfl = f_fmla(h2.hi, zw1, h4 * zw0);
234
235 let mut f = poly_dd_3(h2, CH, zfl);
236 f = DoubleDouble::quick_mult(h3, f);
237 let (ah, mut az);
238 if i == 0 {
239 ah = h.hi;
240 az = f;
241 } else {
242 let mut df = 0.;
243 if i < 128 {
244 df = f64::copysign(1.0, x) * f64::from_bits(ATAN_REDUCE[i as usize].1);
245 }
246 let id = f64::copysign(i as f64, x);
247 ah = f64::from_bits(0x3f8921fb54442d00) * id;
248 az = DoubleDouble::new(
249 f64::from_bits(0xb97fc8f8cbb5bf80) * id,
250 f64::from_bits(0x3c88469898cc5180) * id,
251 );
252 az = DoubleDouble::add(az, DoubleDouble::new(0., df));
253 az = DoubleDouble::add(az, h);
254 az = DoubleDouble::add(az, f);
255 }
256 let v0 = DoubleDouble::from_exact_add(ah, az.hi);
257 let v1 = DoubleDouble::from_exact_add(v0.lo, az.lo);
258
259 v1.hi + v0.hi
260}
261
262pub fn f_atan(x: f64) -> f64 {
266 const CH: [u64; 4] = [
267 0x3ff0000000000000,
268 0xbfd555555555552b,
269 0x3fc9999999069c20,
270 0xbfc248d2c8444ac6,
271 ];
272 let t = x.to_bits();
273 let at = t & 0x7fffffffffffffff; let mut i = (at.wrapping_shr(51) as i64).wrapping_sub(2030i64); if at < 0x3f7b21c475e6362au64 {
276 if at == 0 {
278 return x;
279 } const CH2: [u64; 4] = [
281 0xbfd5555555555555,
282 0x3fc99999999998c1,
283 0xbfc249249176aec0,
284 0x3fbc711fd121ae80,
285 ];
286 if at < 0x3e40000000000000u64 {
287 return dyad_fmla(f64::from_bits(0xbc90000000000000), x, x);
291 }
292 let x2 = x * x;
293 let x3 = x * x2;
294 let x4 = x2 * x2;
295
296 let w0 = f_fmla(x2, f64::from_bits(CH2[3]), f64::from_bits(CH2[2]));
297 let w1 = f_fmla(x2, f64::from_bits(CH2[1]), f64::from_bits(CH2[0]));
298
299 let f = x3 * f_fmla(x4, w0, w1);
300 let ub = f_fmla(f, f64::from_bits(0x3cd2000000000000), f) + x;
301 let lb = f_fmla(-f, f64::from_bits(0x3cc4000000000000), f) + x;
302 if ub != lb {
304 return atan_refine(x, ub);
305 }
306 return ub;
307 }
308 let h;
309 let ah;
310 let mut al;
311 if at > 0x4062ded8e34a9035u64 {
312 ah = f64::copysign(f64::from_bits(0x3ff921fb54442d18), x);
314 al = f64::copysign(f64::from_bits(0x3c91a62633145c07), x);
315 if at >= 0x434d02967c31cdb5u64 {
316 if at > (0x7ffu64 << 52) {
318 return x + x;
319 } return ah + al;
321 }
322 h = -1.0 / x;
323 } else {
324 let u: u64 = t & 0x0007ffffffffffff;
326 let ut: u64 = u.wrapping_shr(51 - 16);
327 let ut2: u64 = (ut * ut).wrapping_shr(16);
328 let lc = ATAN_CIRCLE[i as usize];
329 i = (((lc[0] as u64)
330 .wrapping_shl(16)
331 .wrapping_add(ut * lc[1] as u64)
332 .wrapping_sub(ut2 * lc[2] as u64))
333 >> (16 + 9)) as i64;
334 let la = ATAN_REDUCE[i as usize];
335 let ta = f64::copysign(1.0, x) * f64::from_bits(la.0);
336 let id = f64::copysign(1.0, x) * i as f64;
337 al = dd_fmla(
338 f64::copysign(1.0, x),
339 f64::from_bits(la.1),
340 f64::from_bits(0x3c88469898cc5170) * id,
341 );
342 h = (x - ta) / f_fmla(x, ta, 1.0);
343 ah = f64::from_bits(0x3f8921fb54442d00) * id;
344 }
345 let h2 = h * h;
346 let h4 = h2 * h2;
347
348 let f0 = f_fmla(h2, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
349 let f1 = f_fmla(h2, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
350
351 let f = f_fmla(h4, f0, f1);
352 al = dd_fmla(h, f, al);
353 let ub = f_fmla(h, f64::from_bits(0x3ccf800000000000), al) + ah;
354 let lb = f_fmla(-h, f64::from_bits(0x3ccf800000000000), al) + ah;
355 if lb != ub {
357 return atan_refine(x, ub);
358 }
359 ub
360}
361
362#[cfg(test)]
363mod tests {
364 use super::*;
365
366 #[test]
367 fn atan_test() {
368 assert_eq!(f_atan(7.7877585082074305E-308), 7.78775850820743e-308);
369 assert_eq!(f_atan(0.0), 0.0);
370 assert_eq!(f_atan(1.0), 0.7853981633974483096156608458198);
371 assert_eq!(f_atan(35.9), 1.542948374599341097473183563168947);
372 assert_eq!(f_atan(-35.9), -1.542948374599341097473183563168947);
373 assert_eq!(f_atan(f64::INFINITY), 1.5707963267948966);
374 }
375}