pxfm/tangent/
atan.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::shared_eval::poly_dd_3;
32
33pub(crate) static ATAN_CIRCLE: [[u16; 3]; 31] = [
34    [419, 81, 0],
35    [500, 81, 0],
36    [582, 163, 0],
37    [745, 163, 0],
38    [908, 326, 0],
39    [1234, 326, 0],
40    [1559, 651, 0],
41    [2210, 650, 1],
42    [2860, 1299, 3],
43    [4156, 1293, 4],
44    [5444, 2569, 24],
45    [7989, 2520, 32],
46    [10476, 4917, 168],
47    [15224, 4576, 200],
48    [19601, 8341, 838],
49    [27105, 6648, 731],
50    [33036, 10210, 1998],
51    [41266, 6292, 1117],
52    [46469, 7926, 2048],
53    [52375, 4038, 849],
54    [55587, 4591, 1291],
55    [58906, 2172, 479],
56    [60612, 2390, 688],
57    [62325, 1107, 247],
58    [63192, 1207, 349],
59    [64056, 556, 124],
60    [64491, 605, 175],
61    [64923, 278, 62],
62    [65141, 303, 88],
63    [65358, 139, 31],
64    [65467, 151, 44],
65];
66
67pub(crate) static ATAN_REDUCE: [(u64, u64); 129] = [
68    (0x0000000000000000, 0x0000000000000000),
69    (0x3f89224e047e368e, 0x3c1a3ca6c727c59d),
70    (0x3f992346247a91f0, 0x3bf138b0ef96a186),
71    (0x3fa2dbaae9a05db0, 0x3c436e7f8a3f5e42),
72    (0x3fa927278a3b1162, 0xbbfac986efb92662),
73    (0x3faf7495ea3f3783, 0x3c406ec8011ee816),
74    (0x3fb2e239ccff3831, 0xbc5858437d431332),
75    (0x3fb60b9f7597fdec, 0xbc3cebd13eb7c513),
76    (0x3fb936bb8c5b2da2, 0xbc5840cac0d81db5),
77    (0x3fbc63ce377fc802, 0x3c5400b0fdaa109e),
78    (0x3fbf93183a8db9e9, 0x3c40e04e06c86e72),
79    (0x3fc1626d85a91e70, 0x3c4f7ad829163ca7),
80    (0x3fc2fcac73a60640, 0xbc52680735ce2cd8),
81    (0x3fc4986a74cf4e57, 0xbc690559690b42e4),
82    (0x3fc635c990ce0d36, 0x3c591d29110b41aa),
83    (0x3fc7d4ec54fb5968, 0xbc4ea90e27182780),
84    (0x3fc975f5e0553158, 0xbc2dc82ac14e3e1c),
85    (0x3fcb1909efd8b762, 0xbc573a10fd13daaf),
86    (0x3fccbe4ceb4b4cf2, 0xbc63a7ffbeabda0b),
87    (0x3fce65e3f27c9f2a, 0xbc6db6627a24d523),
88    (0x3fd007fa758626ae, 0xbc645f97dd3099f6),
89    (0x3fd0de53475f3b3c, 0xbc66293f68741816),
90    (0x3fd1b6103d3597e9, 0xbc6ab240d40633e9),
91    (0x3fd28f459ecad74d, 0xbc2de34d14e832e0),
92    (0x3fd36a08355c63dc, 0x3c6af540d9fb4926),
93    (0x3fd4466d542bac92, 0x3c6da60fdbc82ac4),
94    (0x3fd5248ae1701b17, 0xbc792a601170138a),
95    (0x3fd604775fbb27df, 0xbc67f1fca1d5d15b),
96    (0x3fd6e649f7d78649, 0xbc64e223ea716c7b),
97    (0x3fd7ca1a832d0f84, 0x3c7b24c824ac51fc),
98    (0x3fd8b00196b3d022, 0x3c64314cd132ba43),
99    (0x3fd998188e816bf0, 0xbc711f1e0817879a),
100    (0x3fda827999fcef32, 0xbc6c3dea4dbad538),
101    (0x3fdb6f3fc8c61e5b, 0x3c660d1b780ee3eb),
102    (0x3fdc5e87185e67b6, 0xbc4ab5edb7dfa545),
103    (0x3fdd506c82a2c800, 0xbc68e1437048b5bd),
104    (0x3fde450e0d273e7a, 0xbc706951c97b050f),
105    (0x3fdf3c8ad985d9ee, 0xbc414af9522ab518),
106    (0x3fe01b819b5a7cf7, 0xbc7aba0d7d97d1f2),
107    (0x3fe09a4c59bd0d4d, 0x3c4095bc4ebc2c42),
108    (0x3fe11ab7190834ec, 0x3c8798826fa27774),
109    (0x3fe19cd3fe8e405d, 0x3c8008f6258fc98f),
110    (0x3fe220b5ef047825, 0xbc5462af7ceb7de6),
111    (0x3fe2a6709a74f289, 0xbc71184dfd78b472),
112    (0x3fe32e1889047ffd, 0x3c79141876dc40c5),
113    (0x3fe3b7c3289ed6f3, 0x3c8481c20189726c),
114    (0x3fe44386db9ce5db, 0x3c82e851bd025441),
115    (0x3fe4d17b087b265d, 0x3c713ada9b8bc419),
116    (0x3fe561b82ab7f990, 0xbc805b4c3c4cbee8),
117    (0x3fe5f457e4f4812e, 0xbc85619249bd96f1),
118    (0x3fe6897514751db6, 0xbc6b0a0fbcafc671),
119    (0x3fe7212be621be6d, 0xbc819ff2dc66da45),
120    (0x3fe7bb99ed2990cf, 0x3c81320449592d92),
121    (0x3fe858de3b716571, 0xbc81fddcd2f3da8e),
122    (0x3fe8f9197bf85eeb, 0x3c6d44a42e35cc97),
123    (0x3fe99c6e0f634394, 0xbc7585a178b4a18d),
124    (0x3fea43002ae42850, 0x3c6f95a531b3a970),
125    (0x3feaecf5f9ba35a6, 0xbc396c2d43ca3392),
126    (0x3feb9a77c18c1af2, 0xbc6a5bed94b05def),
127    (0x3fec4bb009e77983, 0x3c454509d2bff511),
128    (0x3fed00cbc7384d2e, 0xbc6b4c867cef300c),
129    (0x3fedb9fa89953fcf, 0xbc1ddfac663d6bc6),
130    (0x3fee776eafc91706, 0xbc7a510683ff7cb6),
131    (0x3fef395d9f0e3c92, 0x3c44fdcd8e4e8710),
132    (0x3ff0000000000000, 0x0000000000000000),
133    (0x3ff065c900aaf2d8, 0xbc8deec7fc9042ad),
134    (0x3ff0ce29d0883c99, 0xbc8395ae45e0657d),
135    (0x3ff139447e6a86ee, 0x3c8332cf301a97f3),
136    (0x3ff1a73d55278c4b, 0xbc86cc8c4b78213b),
137    (0x3ff2183b0c4573ff, 0x3c870a90841da57a),
138    (0x3ff28c66fdaf8f09, 0xbc6ba39bad450ee0),
139    (0x3ff303ed61109e20, 0xbc88692946d9f93c),
140    (0x3ff37efd8d87607e, 0x3c63b711bf765b58),
141    (0x3ff3fdca42847507, 0x3c7c21387985b081),
142    (0x3ff48089f8bf42cc, 0xbc87ddb19d3d0efc),
143    (0x3ff507773c537ead, 0xbc7f5e354cf971f3),
144    (0x3ff592d11142fa55, 0xbc700f0ad675330d),
145    (0x3ff622db63c8ecc2, 0xbc82c93f50ab2c0e),
146    (0x3ff6b7df86265200, 0x3c7bec391adc37d5),
147    (0x3ff7522cbdd428a8, 0xbc69686ddc9ffcf5),
148    (0x3ff7f218e25a7461, 0xbc78d16529514246),
149    (0x3ff89801106cc709, 0xbc8092f51e9c2803),
150    (0x3ff9444a7462122a, 0xbc807c06755404c4),
151    (0x3ff9f7632fa9e871, 0x3c802e0d43abc92b),
152    (0x3ffab1c35d8a74ea, 0x3c5d0184e48af6f7),
153    (0x3ffb73ee3c3ef16a, 0x3c773be957380bc2),
154    (0x3ffc3e738086bc0f, 0xbc702b6e26c84462),
155    (0x3ffd11f0dae40609, 0x3c525c4f3ffa6e1f),
156    (0x3ffdef13b73c1406, 0xbc5e302db3c6823f),
157    (0x3ffed69b4153a45d, 0x3c73207830326c0e),
158    (0x3fffc95abad6cf4a, 0xbc66308cee7927bf),
159    (0x4000641e192ceab3, 0xbc70147ebf0df4c5),
160    (0x4000ea21d716fbf7, 0xbc7168533cc41d8b),
161    (0x40017749711a6679, 0xbc652a0b0333e9c5),
162    (0x40020c36c6a7f38e, 0x3c68659eece35395),
163    (0x4002a99f50fd4f4f, 0x3c820fcad18cb36f),
164    (0x4003504f333f9de6, 0xbc752afdbd5a8c74),
165    (0x4004012ce2586a17, 0xbc79747a792907d7),
166    (0x4004bd3d87fe0650, 0x3c790c59393b52c8),
167    (0x400585aa4e1530fa, 0x3c7af6934f13a3a8),
168    (0x40065bc6cc825147, 0xbc48534dcab5ad3e),
169    (0x40074118e4b6a7c8, 0xbc7555aa8bfca9a1),
170    (0x400837626d70fdb8, 0xbc556b3fee9ca72b),
171    (0x400940ad30abc792, 0x3c54b3fdd4fdc06c),
172    (0x400a5f59e90600dd, 0x3c6285d367c55ddc),
173    (0x400b9633283b6d14, 0xbc48712976f17a16),
174    (0x400ce885653127e7, 0xbc3abe8ab65d49fc),
175    (0x400e5a3de972a377, 0x3c5cd9be81ad764b),
176    (0x400ff01305ecd8dc, 0x3c4742c2922656fa),
177    (0x4010d7dc7cff4c9e, 0xbc77c842978bee09),
178    (0x4011d0143e71565f, 0x3c67bc7dea7c3c03),
179    (0x4012e4ff1626b949, 0x3c4aefbe25b404e9),
180    (0x40141bfee2424771, 0xbc34bcfaaa95cb2c),
181    (0x40157be4eaa5e11b, 0x3c50fe741e4ec679),
182    (0x40170d751908c1b1, 0x3c5fe74a5b0ec709),
183    (0x4018dc25c117782b, 0x3c50ca1c19f710ef),
184    (0x401af73f4ca3310f, 0x3c52867b40ba77d6),
185    (0x401d7398d15e70db, 0x3c60fd4e0d4b1547),
186    (0x4020372fb36b87e2, 0x3c5c16c9ecc1621d),
187    (0x402208dbdae055ef, 0x3c56b81a36e75e8c),
188    (0x40244e6c595afdcc, 0xbc57c22045771848),
189    (0x4027398c57f3f1ad, 0x3c5970503be105c0),
190    (0x402b1d03c03d2f7f, 0xbc3f299d010aead2),
191    (0x403046e9fe60a77e, 0x3c5d2b61deff33ec),
192    (0x40345affed201b55, 0x3bf0e84d9567203a),
193    (0x403b267195b1ffae, 0xbbfad44b44b92653),
194    (0x40445e2455e4aaa7, 0xbc3296d577b5e21d),
195    (0x40545eed6854ce99, 0x3c02db53886013ca),
196    (0x0000000000000000, 0x0000000000000000),
197];
198
199#[cold]
200fn atan_refine(x: f64, a: f64) -> f64 {
201    const CH: [(u64, u64); 3] = [
202        (0xbfd5555555555555, 0xbc75555555555555),
203        (0x3fc999999999999a, 0xbc6999999999bcb8),
204        (0xbfc2492492492492, 0xbc6249242093c016),
205    ];
206    const CL: [u64; 4] = [
207        0x3fbc71c71c71c71c,
208        0xbfb745d1745d1265,
209        0x3fb3b13b115bcbc4,
210        0xbfb1107c41ad3253,
211    ];
212    let phi = f_fmla(a.abs(), f64::from_bits(0x40545f306dc9c883), 256.5).to_bits();
213    let i = ((phi >> (52 - 8)) & 0xff) as i64;
214    let h: DoubleDouble = if i == 128 {
215        DoubleDouble::from_quick_recip(-x)
216    } else {
217        let ta = f64::copysign(f64::from_bits(ATAN_REDUCE[i as usize].0), x);
218        let dzt = DoubleDouble::from_exact_mult(x, ta);
219        let zmta = x - ta;
220        let v = 1.0 + dzt.hi;
221        let d = 1.0 - v;
222        let ev = (d + dzt.hi) - ((d + v) - 1.0) + dzt.lo;
223        let r = 1.0 / v;
224        let rl = f_fmla(-ev, r, dd_fmla(r, -v, 1.0)) * r;
225        DoubleDouble::quick_mult_f64(DoubleDouble::new(rl, r), zmta)
226    };
227    let h2 = DoubleDouble::quick_mult(h, h);
228    let h3 = DoubleDouble::quick_mult(h, h2);
229    let h4 = h2.hi * h2.hi;
230
231    let zw0 = f_fmla(h2.hi, f64::from_bits(CL[3]), f64::from_bits(CL[2]));
232    let zw1 = f_fmla(h2.hi, f64::from_bits(CL[1]), f64::from_bits(CL[0]));
233    let zfl = f_fmla(h2.hi, zw1, h4 * zw0);
234
235    let mut f = poly_dd_3(h2, CH, zfl);
236    f = DoubleDouble::quick_mult(h3, f);
237    let (ah, mut az);
238    if i == 0 {
239        ah = h.hi;
240        az = f;
241    } else {
242        let mut df = 0.;
243        if i < 128 {
244            df = f64::copysign(1.0, x) * f64::from_bits(ATAN_REDUCE[i as usize].1);
245        }
246        let id = f64::copysign(i as f64, x);
247        ah = f64::from_bits(0x3f8921fb54442d00) * id;
248        az = DoubleDouble::new(
249            f64::from_bits(0xb97fc8f8cbb5bf80) * id,
250            f64::from_bits(0x3c88469898cc5180) * id,
251        );
252        az = DoubleDouble::add(az, DoubleDouble::new(0., df));
253        az = DoubleDouble::add(az, h);
254        az = DoubleDouble::add(az, f);
255    }
256    let v0 = DoubleDouble::from_exact_add(ah, az.hi);
257    let v1 = DoubleDouble::from_exact_add(v0.lo, az.lo);
258
259    v1.hi + v0.hi
260}
261
262/// Computes atan in double precision
263///
264/// ULP 0.5
265pub fn f_atan(x: f64) -> f64 {
266    const CH: [u64; 4] = [
267        0x3ff0000000000000,
268        0xbfd555555555552b,
269        0x3fc9999999069c20,
270        0xbfc248d2c8444ac6,
271    ];
272    let t = x.to_bits();
273    let at = t & 0x7fffffffffffffff; // at encodes |x|
274    let mut i = (at.wrapping_shr(51) as i64).wrapping_sub(2030i64); // -2030 <= i <= 2065
275    if at < 0x3f7b21c475e6362au64 {
276        // |x| < 0x1.b21c475e6362ap-8
277        if at == 0 {
278            return x;
279        } // atan(+/-0) = +/-0
280        const CH2: [u64; 4] = [
281            0xbfd5555555555555,
282            0x3fc99999999998c1,
283            0xbfc249249176aec0,
284            0x3fbc711fd121ae80,
285        ];
286        if at < 0x3e40000000000000u64 {
287            // |x| < 0x1p-27
288            /* underflow when 0 < |x| < 2^-1022 or when |x| = 2^-1022
289            and rounding towards zero. */
290            return dyad_fmla(f64::from_bits(0xbc90000000000000), x, x);
291        }
292        let x2 = x * x;
293        let x3 = x * x2;
294        let x4 = x2 * x2;
295
296        let w0 = f_fmla(x2, f64::from_bits(CH2[3]), f64::from_bits(CH2[2]));
297        let w1 = f_fmla(x2, f64::from_bits(CH2[1]), f64::from_bits(CH2[0]));
298
299        let f = x3 * f_fmla(x4, w0, w1);
300        let ub = f_fmla(f, f64::from_bits(0x3cd2000000000000), f) + x;
301        let lb = f_fmla(-f, f64::from_bits(0x3cc4000000000000), f) + x;
302        // Ziv's accuracy test
303        if ub != lb {
304            return atan_refine(x, ub);
305        }
306        return ub;
307    }
308    let h;
309    let ah;
310    let mut al;
311    if at > 0x4062ded8e34a9035u64 {
312        // |x| > 0x4062ded8e34a9035
313        ah = f64::copysign(f64::from_bits(0x3ff921fb54442d18), x);
314        al = f64::copysign(f64::from_bits(0x3c91a62633145c07), x);
315        if at >= 0x434d02967c31cdb5u64 {
316            // |x| >= 1.63312e+16
317            if at > (0x7ffu64 << 52) {
318                return x + x;
319            } // NaN
320            return ah + al;
321        }
322        h = -1.0 / x;
323    } else {
324        // now 0.006624 <= |x| <= 150.964 thus 1<=i<=30
325        let u: u64 = t & 0x0007ffffffffffff;
326        let ut: u64 = u.wrapping_shr(51 - 16);
327        let ut2: u64 = (ut * ut).wrapping_shr(16);
328        let lc = ATAN_CIRCLE[i as usize];
329        i = (((lc[0] as u64)
330            .wrapping_shl(16)
331            .wrapping_add(ut * lc[1] as u64)
332            .wrapping_sub(ut2 * lc[2] as u64))
333            >> (16 + 9)) as i64;
334        let la = ATAN_REDUCE[i as usize];
335        let ta = f64::copysign(1.0, x) * f64::from_bits(la.0);
336        let id = f64::copysign(1.0, x) * i as f64;
337        al = dd_fmla(
338            f64::copysign(1.0, x),
339            f64::from_bits(la.1),
340            f64::from_bits(0x3c88469898cc5170) * id,
341        );
342        h = (x - ta) / f_fmla(x, ta, 1.0);
343        ah = f64::from_bits(0x3f8921fb54442d00) * id;
344    }
345    let h2 = h * h;
346    let h4 = h2 * h2;
347
348    let f0 = f_fmla(h2, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
349    let f1 = f_fmla(h2, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
350
351    let f = f_fmla(h4, f0, f1);
352    al = dd_fmla(h, f, al);
353    let ub = f_fmla(h, f64::from_bits(0x3ccf800000000000), al) + ah;
354    let lb = f_fmla(-h, f64::from_bits(0x3ccf800000000000), al) + ah;
355    // Ziv's accuracy test
356    if lb != ub {
357        return atan_refine(x, ub);
358    }
359    ub
360}
361
362#[cfg(test)]
363mod tests {
364    use super::*;
365
366    #[test]
367    fn atan_test() {
368        assert_eq!(f_atan(7.7877585082074305E-308), 7.78775850820743e-308);
369        assert_eq!(f_atan(0.0), 0.0);
370        assert_eq!(f_atan(1.0), 0.7853981633974483096156608458198);
371        assert_eq!(f_atan(35.9), 1.542948374599341097473183563168947);
372        assert_eq!(f_atan(-35.9), -1.542948374599341097473183563168947);
373        assert_eq!(f_atan(f64::INFINITY), 1.5707963267948966);
374    }
375}