pxfm/triangle/
hypotf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::EXP_MASK_F32;
30
31/// Hypot function
32///
33/// Max ULP 0.5
34#[inline]
35pub fn f_hypotf(x: f32, y: f32) -> f32 {
36    let x_abs = f32::from_bits(x.to_bits() & 0x7fff_ffffu32);
37    let y_abs = f32::from_bits(y.to_bits() & 0x7fff_ffffu32);
38
39    let a_bits = x_abs.to_bits().max(y_abs.to_bits());
40    let b_bits = x_abs.to_bits().min(y_abs.to_bits());
41
42    let a_u = a_bits;
43    let b_u = b_bits;
44
45    if a_u >= EXP_MASK_F32 {
46        // x or y is inf or nan
47        if f32::from_bits(a_bits).is_nan() || f32::from_bits(b_bits).is_nan() {
48            return f32::NAN;
49        }
50        if f32::from_bits(a_bits).is_infinite() || f32::from_bits(b_bits).is_infinite() {
51            return f32::INFINITY;
52        }
53        return f32::from_bits(a_bits);
54    }
55
56    if a_u.wrapping_sub(b_u) >= ((23u32 + 2) << 23) {
57        return x_abs + y_abs;
58    }
59
60    #[cfg(any(
61        all(
62            any(target_arch = "x86", target_arch = "x86_64"),
63            target_feature = "fma"
64        ),
65        all(target_arch = "aarch64", target_feature = "neon")
66    ))]
67    {
68        let ad = x as f64;
69        let bd = y as f64;
70        use crate::common::f_fmla;
71        // for FMA environment we're using Kahan style summation which is short and reliable.
72        let w = bd * bd; // RN(bc)
73        let e = f_fmla(-bd, bd, w); // RN(w − bc)
74        let f = f_fmla(ad, ad, w); // RN(ad + w)
75        let r = e + f; // RN(f + e)
76        let hyp = r.sqrt(); // sqrt(x^2 + y^2)
77        hyp as f32
78    }
79    #[cfg(not(any(
80        all(
81            any(target_arch = "x86", target_arch = "x86_64"),
82            target_feature = "fma"
83        ),
84        all(target_arch = "aarch64", target_feature = "neon")
85    )))]
86    {
87        let ad = f32::from_bits(a_bits) as f64;
88        let bd = f32::from_bits(b_bits) as f64;
89        use crate::double_double::DoubleDouble;
90        let dy2 = DoubleDouble::from_exact_mult(bd, bd);
91        let fdx = DoubleDouble::from_exact_mult(ad, ad);
92        // elements are always sorted thus fdx.hi > dy2.hi, thus fasttwosum requirements is fulfilled
93        let f = DoubleDouble::add_f64(fdx, dy2.hi).to_f64();
94        let r = dy2.lo + f;
95        let cath = r.sqrt();
96        cath as f32
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103
104    #[test]
105    fn test_hypotf() {
106        assert_eq!(
107            f_hypotf(
108                0.000000000000000000000000000000000000000091771,
109                0.000000000000000000000000000000000000011754585
110            ),
111            0.000000000000000000000000000000000000011754944
112        );
113        assert_eq!(
114            f_hypotf(9.177e-41, 1.1754585e-38),
115            0.000000000000000000000000000000000000011754944
116        );
117        let dx = (f_hypotf(1f32, 1f32) - (1f32 * 1f32 + 1f32 * 1f32).sqrt()).abs();
118        assert!(dx < 1e-5);
119        let dx = (f_hypotf(5f32, 5f32) - (5f32 * 5f32 + 5f32 * 5f32).sqrt()).abs();
120        assert!(dx < 1e-5);
121    }
122
123    #[test]
124    fn test_hypotf_edge_cases() {
125        assert_eq!(f_hypotf(-1.0, -3.0), 3.1622777);
126        assert_eq!(f_hypotf(0.0, 0.0), 0.0);
127        assert_eq!(f_hypotf(f32::INFINITY, 0.0), f32::INFINITY);
128        assert_eq!(f_hypotf(0.0, f32::INFINITY), f32::INFINITY);
129        assert_eq!(f_hypotf(f32::INFINITY, f32::INFINITY), f32::INFINITY);
130        assert_eq!(f_hypotf(f32::NEG_INFINITY, 0.0), f32::INFINITY);
131        assert_eq!(f_hypotf(0.0, f32::NEG_INFINITY), f32::INFINITY);
132        assert_eq!(
133            f_hypotf(f32::NEG_INFINITY, f32::NEG_INFINITY),
134            f32::INFINITY
135        );
136        assert!(f_hypotf(f32::NAN, 1.0).is_nan());
137        assert!(f_hypotf(1.0, f32::NAN).is_nan());
138    }
139}