pxfm/tangent/
atan2pi.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::acospi::{INV_PI_DD, INV_PI_F128};
30use crate::common::f_fmla;
31use crate::double_double::DoubleDouble;
32use crate::dyadic_float::DyadicFloat128;
33use crate::rounding::CpuRound;
34use crate::tangent::atan2::{ATAN_I, atan_eval, atan2_hard};
35
36/// If one of arguments is too huge or too small, extended precision is required for
37/// case with big exponent difference
38#[cold]
39fn atan2pi_big_exp_difference_hard(
40    num: f64,
41    den: f64,
42    x_sign: usize,
43    y_sign: usize,
44    recip: bool,
45    final_sign: f64,
46) -> f64 {
47    const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
48    const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
49    static CONST_ADJ_INV_PI: [[[DoubleDouble; 2]; 2]; 2] = [
50        [
51            [ZERO, DoubleDouble::new(0., -1. / 2.)],
52            [MZERO, DoubleDouble::new(0., -1. / 2.)],
53        ],
54        [
55            [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
56            [DoubleDouble::new(0., -1.), DoubleDouble::new(0., 1. / 2.)],
57        ],
58    ];
59    let const_term = CONST_ADJ_INV_PI[x_sign][y_sign][recip as usize];
60    let scaled_div = DyadicFloat128::from_div_f64(num, den) * INV_PI_F128;
61    let sign_f128 = DyadicFloat128::new_from_f64(final_sign);
62    let p = DyadicFloat128::new_from_f64(const_term.hi * final_sign);
63    let p1 = sign_f128 * (DyadicFloat128::new_from_f64(const_term.lo) + scaled_div);
64    let r = p + p1;
65    r.fast_as_f64()
66}
67
68/// Computes atan(x)/PI
69///
70/// Max found ULP 0.5
71pub fn f_atan2pi(y: f64, x: f64) -> f64 {
72    static IS_NEG: [f64; 2] = [1.0, -1.0];
73    const ZERO: DoubleDouble = DoubleDouble::new(0.0, 0.0);
74    const MZERO: DoubleDouble = DoubleDouble::new(-0.0, -0.0);
75    const PI: DoubleDouble = DoubleDouble::new(
76        f64::from_bits(0x3ca1a62633145c07),
77        f64::from_bits(0x400921fb54442d18),
78    );
79    const MPI: DoubleDouble = DoubleDouble::new(
80        f64::from_bits(0xbca1a62633145c07),
81        f64::from_bits(0xc00921fb54442d18),
82    );
83    const PI_OVER_2: DoubleDouble = DoubleDouble::new(
84        f64::from_bits(0x3c91a62633145c07),
85        f64::from_bits(0x3ff921fb54442d18),
86    );
87    const MPI_OVER_2: DoubleDouble = DoubleDouble::new(
88        f64::from_bits(0xbc91a62633145c07),
89        f64::from_bits(0xbff921fb54442d18),
90    );
91    const PI_OVER_4: DoubleDouble = DoubleDouble::new(
92        f64::from_bits(0x3c81a62633145c07),
93        f64::from_bits(0x3fe921fb54442d18),
94    );
95    const THREE_PI_OVER_4: DoubleDouble = DoubleDouble::new(
96        f64::from_bits(0x3c9a79394c9e8a0a),
97        f64::from_bits(0x4002d97c7f3321d2),
98    );
99
100    // Adjustment for constant term:
101    //   CONST_ADJ[x_sign][y_sign][recip]
102    static CONST_ADJ: [[[DoubleDouble; 2]; 2]; 2] = [
103        [[ZERO, MPI_OVER_2], [MZERO, MPI_OVER_2]],
104        [[MPI, PI_OVER_2], [MPI, PI_OVER_2]],
105    ];
106
107    let x_sign = x.is_sign_negative() as usize;
108    let y_sign = y.is_sign_negative() as usize;
109    let x_bits = x.to_bits() & 0x7fff_ffff_ffff_ffff;
110    let y_bits = y.to_bits() & 0x7fff_ffff_ffff_ffff;
111    let x_abs = x_bits;
112    let y_abs = y_bits;
113    let recip = x_abs < y_abs;
114    let mut min_abs = if recip { x_abs } else { y_abs };
115    let mut max_abs = if !recip { x_abs } else { y_abs };
116    let mut min_exp = min_abs.wrapping_shr(52);
117    let mut max_exp = max_abs.wrapping_shr(52);
118
119    let mut num = f64::from_bits(min_abs);
120    let mut den = f64::from_bits(max_abs);
121
122    // Check for exceptional cases, whether inputs are 0, inf, nan, or close to
123    // overflow, or close to underflow.
124    if max_exp > 0x7ffu64 - 128u64 || min_exp < 128u64 {
125        if x.is_nan() || y.is_nan() {
126            return f64::NAN;
127        }
128        let x_except = if x == 0.0 {
129            0
130        } else if x.is_infinite() {
131            2
132        } else {
133            1
134        };
135        let y_except = if y == 0.0 {
136            0
137        } else if y.is_infinite() {
138            2
139        } else {
140            1
141        };
142
143        // Exceptional cases:
144        //   EXCEPT[y_except][x_except][x_is_neg]
145        // with x_except & y_except:
146        //   0: zero
147        //   1: finite, non-zero
148        //   2: infinity
149        static EXCEPTS: [[[DoubleDouble; 2]; 3]; 3] = [
150            [[ZERO, PI], [ZERO, PI], [ZERO, PI]],
151            [[PI_OVER_2, PI_OVER_2], [ZERO, ZERO], [ZERO, PI]],
152            [
153                [PI_OVER_2, PI_OVER_2],
154                [PI_OVER_2, PI_OVER_2],
155                [PI_OVER_4, THREE_PI_OVER_4],
156            ],
157        ];
158
159        if (x_except != 1) || (y_except != 1) {
160            let mut r = EXCEPTS[y_except][x_except][x_sign];
161            r = DoubleDouble::quick_mult(r, INV_PI_DD);
162            return f_fmla(IS_NEG[y_sign], r.hi, IS_NEG[y_sign] * r.lo);
163        }
164        let scale_up = min_exp < 128u64;
165        let scale_down = max_exp > 0x7ffu64 - 128u64;
166        // At least one input is denormal, multiply both numerator and denominator
167        // by some large enough power of 2 to normalize denormal inputs.
168        if scale_up {
169            num *= f64::from_bits(0x43f0000000000000);
170            if !scale_down {
171                den *= f64::from_bits(0x43f0000000000000);
172            }
173        } else if scale_down {
174            den *= f64::from_bits(0x3bf0000000000000);
175            if !scale_up {
176                num *= f64::from_bits(0x3bf0000000000000);
177            }
178        }
179
180        min_abs = num.to_bits();
181        max_abs = den.to_bits();
182        min_exp = min_abs.wrapping_shr(52);
183        max_exp = max_abs.wrapping_shr(52);
184    }
185
186    let final_sign = IS_NEG[((x_sign != y_sign) != recip) as usize];
187    let const_term = CONST_ADJ[x_sign][y_sign][recip as usize];
188    let exp_diff = max_exp - min_exp;
189    // We have the following bound for normalized n and d:
190    //   2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1).
191    if exp_diff > 54 {
192        if max_exp >= 1075 || min_exp < 970 {
193            return atan2pi_big_exp_difference_hard(num, den, x_sign, y_sign, recip, final_sign);
194        }
195        let z = DoubleDouble::from_exact_mult(final_sign, const_term.hi);
196        let mut divided = DoubleDouble::from_exact_div(num, den);
197        divided = DoubleDouble::f64_add(const_term.lo, divided);
198        divided = DoubleDouble::quick_mult_f64(divided, final_sign);
199        let r = DoubleDouble::add(z, divided);
200        let p = DoubleDouble::quick_mult(INV_PI_DD, r);
201        return p.to_f64();
202    }
203
204    let mut k = (64.0 * num / den).cpu_round();
205    let idx = k as u64;
206    // k = idx / 64
207    k *= f64::from_bits(0x3f90000000000000);
208
209    // Range reduction:
210    // atan(n/d) - atan(k/64) = atan((n/d - k/64) / (1 + (n/d) * (k/64)))
211    //                        = atan((n - d * k/64)) / (d + n * k/64))
212    let num_k = DoubleDouble::from_exact_mult(num, k);
213    let den_k = DoubleDouble::from_exact_mult(den, k);
214
215    // num_dd = n - d * k
216    let num_dd = DoubleDouble::from_exact_add(num - den_k.hi, -den_k.lo);
217    // den_dd = d + n * k
218    let mut den_dd = DoubleDouble::from_exact_add(den, num_k.hi);
219    den_dd.lo += num_k.lo;
220
221    // q = (n - d * k) / (d + n * k)
222    let q = DoubleDouble::div(num_dd, den_dd);
223    // p ~ atan(q)
224    let p = atan_eval(q);
225
226    let vl = ATAN_I[idx as usize];
227    let vlo = DoubleDouble::from_bit_pair(vl);
228    let mut r = DoubleDouble::add(const_term, DoubleDouble::add(vlo, p));
229
230    r = DoubleDouble::quick_mult(r, INV_PI_DD);
231
232    let err = f_fmla(
233        p.hi,
234        f64::from_bits(0x3bd0000000000000),
235        f64::from_bits(0x3c00000000000000),
236    );
237
238    let ub = r.hi + (r.lo + err);
239    let lb = r.hi + (r.lo - err);
240
241    if ub == lb {
242        r.hi *= final_sign;
243        r.lo *= final_sign;
244
245        return r.to_f64();
246    }
247
248    (atan2_hard(y, x) * INV_PI_F128).fast_as_f64()
249}
250
251#[cfg(test)]
252mod tests {
253    use super::*;
254
255    #[test]
256    fn test_atan2pi() {
257        assert_eq!(
258            f_atan2pi(-0.000000000000010659658919444194, 2088960.4374061823),
259            -0.0000000000000000000016242886924270424
260        );
261        assert_eq!(f_atan2pi(-3.9999999981625933, 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003577142133480227), -0.5);
262        assert_eq!(f_atan2pi(0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000472842255026406,
263            0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008045886150098693
264        ),1.8706499392673612e-162);
265        assert_eq!(f_atan2pi(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002670088630208647,
266                             2.0000019071157054
267        ), 4.249573987697093e-307);
268        assert_eq!(f_atan2pi(-5., 2.), -0.3788810584091566);
269        assert_eq!(f_atan2pi(2., -5.), 0.8788810584091566);
270    }
271}
pxfm/tangent/atan2pi.rs

pxfm/tangent/
atan2pi.rs