pxfm/triangle/
hypot.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 9/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use std::hint::black_box;
32
33// case hypot(x,y) >= 2^1024
34#[cold]
35fn hypot_overflow() -> f64 {
36    const Z: f64 = f64::from_bits(0x7fefffffffffffff);
37    black_box(Z) + black_box(Z)
38}
39
40#[cold]
41fn hypot_denorm(a: u64, b: u64) -> f64 {
42    let mut a = a;
43    let mut b = b;
44    let af = (a as i64) as f64;
45    let bf = (b as i64) as f64;
46    let mut underflow;
47    // af and bf are x and y multiplied by 2^1074, thus integers
48    a = a.wrapping_shl(1);
49    b = b.wrapping_shl(1);
50    let mut rm = f_fmla(af, af, bf * bf).sqrt() as u64;
51    let mut tm: i64 = rm.wrapping_shl(1) as i64;
52    let mut denom: i64 = a
53        .wrapping_mul(a)
54        .wrapping_add(b.wrapping_mul(b))
55        .wrapping_sub((tm as u64).wrapping_mul(tm as u64)) as i64;
56    // D = a^2+b^2 - tm^2
57    while denom > 2 * tm {
58        // tm too small
59        denom -= 2 * tm + 1; // (tm+1)^2 = tm^2 + 2*tm + 1
60        tm = tm.wrapping_add(1);
61    }
62    while denom < 0 {
63        // tm too large
64        denom += 2 * tm - 1; // (tm-1)^2 = tm^2 - 2*tm + 1
65        tm = tm.wrapping_sub(1);
66    }
67    // tm = floor(sqrt(a^2+b^2)) and 0 <= D = a^2+b^2 - tm^2 < 2*tm+1
68    // if D=0 and tm is even, the result is exact
69    // if D=0 and tm is odd, the result is a midpoint
70    let rb: i32 = (tm & 1) as i32; // round bit for rm
71    let rb2: i32 = (denom >= tm) as i32; // round bit for tm
72    let sb: i32 = (denom != 0) as i32; // sticky bit for rm
73    rm = (tm >> 1) as u64; // truncate the low bit
74    underflow = rm < 0x10000000000000u64;
75    if rb != 0 || sb != 0 {
76        let op = 1.0 + f64::from_bits(0x3c90000000000000);
77        let om = 1.0 - f64::from_bits(0x3c90000000000000);
78        if op == om {
79            // rounding to nearest
80            if sb != 0 {
81                rm = rm.wrapping_add(rb as u64);
82                // we have no underflow when rm is now 2^52 and rb2 != 0
83                // Remark: we cannot have a^2+b^2 = (tm+1/2)^2 exactly
84                // since this would mean a^2+b^2 = tm^2+tm+1/4,
85                // thus a^2+b^2 would be an odd multiple of 2^-1077
86                // (since ulp(tm) = 2^-1075)
87                if rm >> 52 != 0 && rb2 != 0 {
88                    underflow = false;
89                }
90            } else {
91                // sticky bit is 0, round bit is 1: underflow doos not change
92                rm += rm & 1; // even rounding
93            }
94        } else if op > 1.0 {
95            // rounding upwards
96            rm = rm.wrapping_add(1);
97            // we have no underflow when rm is now 2^52 and tm was odd
98            if (rm >> 52) != 0 && (tm & 1) != 0 {
99                underflow = false;
100            }
101        }
102        if underflow {
103            // trigger underflow exception _after_ rounding for inexact results
104            let trig_uf = black_box(f64::from_bits(0x0010000000000000));
105            _ = black_box(black_box(trig_uf) * black_box(trig_uf)); // triggers underflow
106        }
107    }
108    // else the result is exact, and we have no underflow
109    f64::from_bits(rm)
110}
111
112/* Here the square root is refined by Newton iterations: x^2+y^2 is exact
113and fits in a 128-bit integer, so the approximation is squared (which
114also fits in a 128-bit integer), compared and adjusted if necessary using
115the exact value of x^2+y^2. */
116#[cold]
117fn hypot_hard(x: f64, y: f64) -> f64 {
118    let op = 1.0 + f64::from_bits(0x3c90000000000000);
119    let om = 1.0 - f64::from_bits(0x3c90000000000000);
120    let mut xi = x.to_bits();
121    let yi = y.to_bits();
122    let mut bm = (xi & 0x000fffffffffffff) | (1u64 << 52);
123    let mut lm = (yi & 0x000fffffffffffff) | (1u64 << 52);
124    let be: i32 = (xi >> 52) as i32;
125    let le: i32 = (yi >> 52) as i32;
126    let ri = f_fmla(x, x, y * y).sqrt().to_bits();
127    const BS: u32 = 2;
128    let mut rm: u64 = ri & 0x000fffffffffffff;
129    let mut re: i32 = ((ri >> 52) as i32).wrapping_sub(0x3ff);
130    rm |= 1u64 << 52;
131    for _ in 0..3 {
132        if rm == (1u64 << 52) {
133            rm = 0x001fffffffffffff;
134            re = re.wrapping_sub(1);
135        } else {
136            rm = rm.wrapping_sub(1);
137        }
138    }
139    bm = bm.wrapping_shl(BS);
140    let mut m2: u64 = bm.wrapping_mul(bm);
141    let de: i32 = be.wrapping_sub(le);
142    let mut ls: i32 = (BS as i32).wrapping_sub(de);
143    if ls >= 0 {
144        lm <<= ls;
145        m2 = m2.wrapping_add(lm.wrapping_mul(lm));
146    } else {
147        let lm2: u128 = (lm as u128).wrapping_mul(lm as u128);
148        ls = ls.wrapping_mul(2);
149        m2 = m2.wrapping_add((lm2 >> -ls) as u64); // since ls < 0, the shift by -ls is legitimate
150        m2 |= ((lm2.wrapping_shl((128 + ls) as u32)) != 0) as u64;
151    }
152    let k: i32 = (BS as i32).wrapping_add(re);
153    let mut denom: i64;
154    loop {
155        rm += 1 + (rm >= (1u64 << 53)) as u64;
156        let tm: u64 = rm.wrapping_shl(k as u32);
157        let rm2: u64 = tm.wrapping_mul(tm);
158        denom = (m2 as i64).wrapping_sub(rm2 as i64);
159        if denom <= 0 {
160            break;
161        }
162    }
163    if denom != 0 {
164        if op == om {
165            let ssm = if rm <= (1u64 << 53) { 1 } else { 0 };
166            let tm: u64 = (rm << k) - (1 << (k - ssm));
167            denom = (m2 as i64).wrapping_sub(tm.wrapping_mul(tm) as i64);
168            if denom != 0 {
169                rm = rm.wrapping_add((denom >> 63) as u64);
170            } else {
171                rm = rm.wrapping_sub(rm & 1);
172            }
173        } else {
174            let ssm = if rm > (1u64 << 53) { 1u32 } else { 0 };
175            let pdm = if op == 1. { 1u64 } else { 0 };
176            rm = rm.wrapping_sub(pdm.wrapping_shl(ssm));
177        }
178    }
179    if rm >= (1u64 << 53) {
180        rm >>= 1;
181        re = re.wrapping_add(1);
182    }
183
184    let e: i64 = be.wrapping_sub(1).wrapping_add(re) as i64;
185    xi = (e as u64).wrapping_shl(52).wrapping_add(rm);
186    f64::from_bits(xi)
187}
188
189/// Computes hypot
190///
191/// Max ULP 0.5
192pub fn f_hypot(x: f64, y: f64) -> f64 {
193    let xi = x.to_bits();
194    let yi = y.to_bits();
195    let emsk = 0x7ffu64 << 52;
196    let mut ex = xi & emsk;
197    let mut ey = yi & emsk;
198    let mut x = x.abs();
199    let mut y = y.abs();
200    if ex == emsk || ey == emsk {
201        /* Either x or y is NaN or Inf */
202        let wx = xi.wrapping_shl(1);
203        let wy = yi.wrapping_shl(1);
204        let wm = emsk.wrapping_shl(1);
205        let ninf: i32 = ((wx == wm) ^ (wy == wm)) as i32;
206        let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32;
207        /* ninf is 1 when only one of x and y is +/-Inf
208        nqnn is 1 when only one of x and y is qNaN
209        IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */
210        if ninf != 0 && nqnn != 0 {
211            return if wx == wm { x * x } else { y * y };
212        }
213        return x + y; /* inf, nan */
214    }
215
216    let u = f64::max(x, y);
217    let v = f64::min(x, y);
218    let mut xd = u.to_bits();
219    let mut yd = v.to_bits();
220    ey = yd;
221    if (ey >> 52) == 0 {
222        // y is subnormal
223        if (yd) == 0 {
224            return f64::from_bits(xd);
225        }
226        ex = xd;
227        if (ex >> 52) == 0 {
228            // x is subnormal too
229            if ex == 0 {
230                return 0.;
231            }
232            return hypot_denorm(ex, ey);
233        }
234        let nz = ey.leading_zeros();
235        ey = ey.wrapping_shl(nz - 11);
236        ey &= u64::MAX >> 12;
237        ey = ey.wrapping_sub(((nz as u64).wrapping_sub(12u64)) << 52);
238        let t = ey;
239        yd = t;
240    }
241
242    let de = xd.wrapping_sub(yd);
243    if de > (27u64 << 52) {
244        return dyad_fmla(f64::from_bits(0x3e40000000000000), v, u);
245    }
246    let off: i64 = ((0x3ffu64 << 52) as i64).wrapping_sub((xd & emsk) as i64);
247    xd = xd.wrapping_add(off as u64);
248    yd = yd.wrapping_add(off as u64);
249    x = f64::from_bits(xd);
250    y = f64::from_bits(yd);
251    let x2 = DoubleDouble::from_exact_mult(x, x);
252    let y2 = DoubleDouble::from_exact_mult(y, y);
253    let r2 = x2.hi + y2.hi;
254    let ir2 = 0.5 / r2;
255    let dr2 = ((x2.hi - r2) + y2.hi) + (x2.lo + y2.lo);
256    let mut th = r2.sqrt();
257    let rsqrt = DoubleDouble::from_exact_mult(th, ir2);
258    let dz = dr2 - rsqrt.lo;
259    let mut tl = rsqrt.hi * dz;
260    let p = DoubleDouble::from_exact_add(th, tl);
261    th = p.hi;
262    tl = p.lo;
263    let mut thd = th.to_bits();
264    let tld = tl.abs().to_bits();
265    ex = thd;
266    ey = tld;
267    ex &= 0x7ffu64 << 52;
268    let aidr = ey.wrapping_add(0x3feu64 << 52).wrapping_sub(ex);
269    let mid = (aidr.wrapping_sub(0x3c90000000000000).wrapping_add(16)) >> 5;
270    if mid == 0 || aidr < 0x39b0000000000000u64 || aidr > 0x3c9fffffffffff80u64 {
271        thd = hypot_hard(x, y).to_bits();
272    }
273    thd = thd.wrapping_sub(off as u64);
274    if thd >= (0x7ffu64 << 52) {
275        return hypot_overflow();
276    }
277    f64::from_bits(thd)
278}
279
280#[cfg(test)]
281mod tests {
282    use super::*;
283    #[test]
284    fn test_hypot() {
285        assert_eq!(
286            f_hypot(2.8480945070593211E-306, 2.1219950320804403E-314),
287            2.848094507059321e-306
288        );
289        assert_eq!(
290            f_hypot(3.5601181736115222E-307, 1.0609978954826362E-314),
291            3.560118173611524e-307
292        );
293        assert_eq!(f_hypot(2., 4.), 4.47213595499958);
294    }
295}
pxfm/triangle/hypot.rs

pxfm/triangle/
hypot.rs