pxfm/square_root/
rsqrt.rs1use crate::double_double::DoubleDouble;
30
31pub fn f_rsqrt(x: f64) -> f64 {
35 let ix = x.to_bits();
36 let r: f64 = if ix < 1u64 << 52 {
37 if ix != 0 {
39 x.sqrt() / x
41 } else {
42 return f64::INFINITY; }
44 } else if ix >= 0x7ffu64 << 52 {
45 if ix.wrapping_shl(1) == 0 {
47 return f64::NEG_INFINITY; }
49 if ix > 0xfff0000000000000u64 {
50 return x + x;
51 } if (ix >> 63) != 0 {
53 return f64::NAN;
55 }
56 if ix.wrapping_shl(12) == 0 {
57 return 0.0;
58 } return x + x; } else {
61 if ix > 0x7fd000000000000u64 {
63 (4.0 / x) * (0.25 * x.sqrt())
66 } else {
67 (1.0 / x) * x.sqrt()
68 }
69 };
70
71 #[cfg(any(
72 all(
73 any(target_arch = "x86", target_arch = "x86_64"),
74 target_feature = "fma"
75 ),
76 all(target_arch = "aarch64", target_feature = "neon")
77 ))]
78 {
79 let d2x = DoubleDouble::from_exact_mult(r, x);
80 use crate::common::f_fmla;
81 let h = f_fmla(r, d2x.lo, f_fmla(r, d2x.hi, -1.0));
82 let dr = (r * 0.5) * h;
83 r - dr
84 }
85 #[cfg(not(any(
86 all(
87 any(target_arch = "x86", target_arch = "x86_64"),
88 target_feature = "fma"
89 ),
90 all(target_arch = "aarch64", target_feature = "neon")
91 )))]
92 {
93 use crate::double_double::two_product_compatible;
94 if !two_product_compatible(x) {
95 recip_hard_dyadic(x, r)
96 } else {
97 let d2x = DoubleDouble::from_exact_mult(r, x);
98 let DoubleDouble { hi: h, lo: pr } = DoubleDouble::quick_mult_f64(d2x, r);
99 let DoubleDouble { hi: p, lo: q } = DoubleDouble::from_full_exact_add(-1.0, h);
100 let h = DoubleDouble::from_exact_add(p, pr + q);
101 let dr = DoubleDouble::quick_mult_f64(h, r * 0.5);
102 r - dr.hi - dr.lo
103 }
104 }
105}
106
107#[cfg(not(any(
108 all(
109 any(target_arch = "x86", target_arch = "x86_64"),
110 target_feature = "fma"
111 ),
112 all(target_arch = "aarch64", target_feature = "neon")
113)))]
114#[cold]
115#[inline(never)]
116fn recip_hard_dyadic(x: f64, r: f64) -> f64 {
117 use crate::dyadic_float::{DyadicFloat128, DyadicSign};
118 let dx = DyadicFloat128::new_from_f64(x);
119 let dr = DyadicFloat128::new_from_f64(r);
120 const M_ONE: DyadicFloat128 = DyadicFloat128 {
121 sign: DyadicSign::Neg,
122 exponent: -127,
123 mantissa: 0x80000000_00000000_00000000_00000000_u128,
124 };
125 let d2 = dx * dr;
126 let h = d2 * dr + M_ONE;
127 let mut half_dr = dr;
128 half_dr.exponent -= 1; let ddr = half_dr * h;
130 (dr - ddr).fast_as_f64()
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn test_rsqrt() {
139 assert_eq!(f_rsqrt(7518001163502890000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.),
140 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011533172976634968);
141 assert_eq!(f_rsqrt(0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001984274103353),
142 709903255474595300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.);
143 assert_eq!(f_rsqrt(0.0), f64::INFINITY);
144 assert_eq!(f_rsqrt(4.0), 0.5);
145 assert_eq!(f_rsqrt(9.0), 1. / 3.);
146 assert_eq!(f_rsqrt(-0.0), f64::NEG_INFINITY);
147 assert!(f_rsqrt(f64::NAN).is_nan());
148 }
149}