pxfm/tangent/
atanf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 4/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{f_fmla, f_fmlaf};
30
31/// Computes atan
32///
33/// Max found ULP 0.49999973
34#[inline]
35pub fn f_atanf(x: f32) -> f32 {
36    const PI2: f64 = f64::from_bits(0x3ff921fb54442d18);
37    let t = x.to_bits();
38    let e = (t >> 23) & 0xff;
39    let gt = e >= 127;
40    let ta = t & 0x7fffffff;
41    if ta >= 0x4c700518u32 {
42        // |x| >= 6.29198e+07
43        if ta > 0x7f800000u32 {
44            return x + x;
45        } // nan
46        return f32::copysign(PI2 as f32, x); // inf or |x| >= 6.29198e+07
47    }
48    if e < 127 - 13 {
49        // |x| < 2^-13
50        if e < 127 - 25 {
51            // |x| < 2^-25
52            if t << 1 == 0 {
53                return x;
54            }
55            let res = f_fmlaf(-x, x.abs(), x);
56            return res;
57        }
58        return f_fmlaf(-f64::from_bits(0x3fd5555560000000) as f32 * x, x * x, x);
59    }
60    /* now |x| >= 0.00012207 */
61    let mut z = x as f64;
62    if gt {
63        z = 1.0 / z;
64    } /* gt is non-zero for |x| >= 1 */
65    let z2 = z * z;
66    let z4 = z2 * z2;
67    let z8 = z4 * z4;
68    /* polynomials generated using rminimax
69       (https://gitlab.inria.fr/sfilip/rminimax) with the following command:
70       ./ratapprox --function="atan(x)" --dom=[0.000122070,1] --num=[x,x^3,x^5,x^7,x^9,x^11,x^13] --den=[1,x^2,x^4,x^6,x^8,x^10,x^12] --output=atanf.sollya --log
71       (see output atanf.sollya)
72       The coefficient cd[0] was slightly reduced from the original value
73       0.330005 to avoid an exceptional case for |x| = 0.069052
74       and rounding to nearest.
75    */
76    const CN: [u64; 7] = [
77        0x3fd51eccde075d67,
78        0x3fea76bb5637f2f2,
79        0x3fe81e0eed20de88,
80        0x3fd376c8ca67d11d,
81        0x3faaec7b69202ac6,
82        0x3f69561899acc73e,
83        0x3efbf9fa5b67e600,
84    ];
85    const CD: [u64; 7] = [
86        0x3fd51eccde075d66,
87        0x3fedfbdd7b392d28,
88        0x3ff0000000000000,
89        0x3fdfd22bf0e89b54,
90        0x3fbd91ff8b576282,
91        0x3f8653ea99fc9bb0,
92        0x3f31e7fcc202340a,
93    ];
94    let mut cn0 = f_fmla(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
95    let cn2 = f_fmla(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
96    let mut cn4 = f_fmla(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
97    let cn6 = f64::from_bits(CN[6]);
98    cn0 = f_fmla(z4, cn2, cn0);
99    cn4 = f_fmla(z4, cn6, cn4);
100    cn0 = f_fmla(z8, cn4, cn0);
101    cn0 *= z;
102    let mut cd0 = f_fmla(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
103    let cd2 = f_fmla(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
104    let mut cd4 = f_fmla(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
105    let cd6 = f64::from_bits(CD[6]);
106    cd0 = f_fmla(z4, cd2, cd0);
107    cd4 = f_fmla(z4, cd6, cd4);
108    cd0 = f_fmla(z8, cd4, cd0);
109    let r = cn0 / cd0;
110    if !gt {
111        return r as f32;
112    } /* for |x| < 1, (float) r is correctly rounded */
113
114    const PI_OVER2_H: f64 = f64::from_bits(0x3ff9000000000000);
115    const PI_OVER2_L: f64 = f64::from_bits(0x3f80fdaa22168c23);
116    /* now r approximates atan(1/x), we use atan(x) + atan(1/x) = sign(x)*pi/2,
117    where PI_OVER2_H + PI_OVER2_L approximates pi/2.
118    With sign(z)*L + (-r + sign(z)*H), it fails for x=0x1.98c252p+12 and
119    rounding upward.
120    With sign(z)*PI - r, where PI is a double approximation of pi to nearest,
121    it fails for x=0x1.ddf9f6p+0 and rounding upward. */
122    ((f64::copysign(PI_OVER2_L, z) - r) + f64::copysign(PI_OVER2_H, z)) as f32
123}
124
125#[cfg(test)]
126mod tests {
127    use super::*;
128
129    #[test]
130    fn f_atan_test() {
131        assert!(
132            (f_atanf(1.0) - std::f32::consts::PI / 4f32).abs() < 1e-6,
133            "Invalid result {}",
134            f_atanf(1f32)
135        );
136        assert!(
137            (f_atanf(2f32) - 1.107148717794090503017065f32).abs() < 1e-6,
138            "Invalid result {}",
139            f_atanf(2f32)
140        );
141        assert!(
142            (f_atanf(5f32) - 1.3734007669450158608612719264f32).abs() < 1e-6,
143            "Invalid result {}",
144            f_atanf(5f32)
145        );
146    }
147}