pxfm/square_root/
sqrt1pm1f.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::polyeval::f_polyeval6;
30
31/// Computes sqrt(1+x) - 1
32pub fn f_sqrt1pm1f(x: f32) -> f32 {
33    // filter out exceptional cases
34    let ix = x.to_bits();
35    let ux = ix.wrapping_shl(1);
36    if ux >= 0xffu32 << 24 || ux <= 0x68000000u32 {
37        // |x| <= f32::EPSILON, |x| == inf, x == NaN
38        if ux == 0 {
39            // |x| == 0
40            return 0.;
41        }
42        if ux.wrapping_shl(8) == 0 {
43            // |x| == Inf
44            return if x.is_sign_negative() {
45                f32::NAN
46            } else {
47                f32::INFINITY
48            };
49        }
50
51        if ux <= 0x68000000u32 {
52            // |x| <= f32::EPSILON
53            // Sqrt(1+x) - 1 ~ x/2-x^2/8 + O(x^3)
54            #[cfg(any(
55                all(
56                    any(target_arch = "x86", target_arch = "x86_64"),
57                    target_feature = "fma"
58                ),
59                all(target_arch = "aarch64", target_feature = "neon")
60            ))]
61            {
62                use crate::common::f_fmlaf;
63                return f_fmlaf(-0.25 * x, 0.25 * x, x * 0.5);
64            }
65            #[cfg(not(any(
66                all(
67                    any(target_arch = "x86", target_arch = "x86_64"),
68                    target_feature = "fma"
69                ),
70                all(target_arch = "aarch64", target_feature = "neon")
71            )))]
72            {
73                use crate::common::f_fmla;
74                let dx = x as f64;
75                return f_fmla(-0.25 * dx, 0.25 * dx, dx * 0.5) as f32;
76            }
77        }
78
79        return f32::NAN; // x == NaN
80    }
81    if (ix >> 31) != 0 && ux >= 0x7f00_0000 {
82        // x < 0 and x >= 1
83        if ux == 0x7f00_0000u32 {
84            // x == -1
85            return -1.;
86        }
87        return f32::NAN;
88    }
89
90    if ux <= 0x78eb_851eu32 {
91        // |x| <= 0.015
92
93        // Polynomial generated by Sollya:
94        // d = [-0.015, 0.015];
95        // sqrt1pm1 = sqrt(x + 1) - 1;
96        // Q = fpminimax(sqrt1pm1, [|1,2,3,4,5,6|], [|D...|], d);
97        const C: [u64; 6] = [
98            0x3fe0000000000034,
99            0xbfc00000000002ee,
100            0x3faffffffc0d541c,
101            0xbfa3fffffa546ce5,
102            0x3f9c016d2be05c25,
103            0xbf95016d1ae9e058,
104        ];
105        let dx = x as f64;
106        let p = f_polyeval6(
107            dx,
108            f64::from_bits(C[0]),
109            f64::from_bits(C[1]),
110            f64::from_bits(C[2]),
111            f64::from_bits(C[3]),
112            f64::from_bits(C[4]),
113            f64::from_bits(C[5]),
114        ) * dx;
115        return p as f32;
116    }
117
118    let dx = x as f64;
119    ((dx + 1.).sqrt() - 1.) as f32
120}
121
122#[cfg(test)]
123mod tests {
124    use super::*;
125
126    #[test]
127    fn test_sqrt1pm1f() {
128        assert_eq!(f_sqrt1pm1f(-0.9), -0.6837722);
129        assert_eq!(f_sqrt1pm1f(24.), 4.);
130        assert_eq!(f_sqrt1pm1f(-0.75), -0.5);
131        assert_eq!(f_sqrt1pm1f(8.), 2.);
132        assert_eq!(f_sqrt1pm1f(9.), 2.1622777);
133        assert_eq!(f_sqrt1pm1f(12.31), 2.6482873);
134        assert_eq!(f_sqrt1pm1f(0.005231), 0.0026120886);
135        assert_eq!(f_sqrt1pm1f(0.00000000005231), 2.6155e-11);
136        assert_eq!(f_sqrt1pm1f(-0.00000000005231), -2.6155e-11);
137        assert_eq!(f_sqrt1pm1f(0.), 0.);
138        assert_eq!(f_sqrt1pm1f(-0.), 0.);
139        assert_eq!(f_sqrt1pm1f(f32::INFINITY), f32::INFINITY);
140        assert!(f_sqrt1pm1f(f32::NEG_INFINITY).is_nan());
141        assert!(f_sqrt1pm1f(f32::NAN).is_nan());
142        assert!(f_sqrt1pm1f(-1.0001).is_nan());
143    }
144}