1use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::polyeval::f_polyeval6;
32
33#[cold]
34fn sqrt1pm1_near_zero_hard(x: f64) -> f64 {
35 const C: [(u64, u64); 10] = [
36 (0x3af2bd7b2a000000, 0x3fe0000000000000),
37 (0xbb1e743d7fe00000, 0xbfc0000000000000),
38 (0xbc18d267496ae7f8, 0x3fb0000000000000),
39 (0x3c2d7a2a887e1af8, 0xbfa4000000000000),
40 (0xbc3a4965117e40c8, 0x3f9c00000000150b),
41 (0x3c3dc057cb5bf82c, 0xbf9500000000209e),
42 (0x3c02d0756979aa48, 0x3f907ffff9c1db3d),
43 (0xbbe30b3d9b8a1020, 0xbf8acffff10fbdbc),
44 (0xbc16d26d5f2efc04, 0x3f8659830d1bf014),
45 (0x3c212aabd12c483e, 0xbf82ff830f9799c4),
46 ];
47 let mut p = DoubleDouble::quick_mul_f64_add(
48 DoubleDouble::from_bit_pair(C[9]),
49 x,
50 DoubleDouble::from_bit_pair(C[8]),
51 );
52 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[7]));
53 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[6]));
54 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[5]));
55 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[4]));
56 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[3]));
57 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[2]));
58 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[1]));
59 p = DoubleDouble::quick_mul_f64_add(p, x, DoubleDouble::from_bit_pair(C[0]));
60 p = DoubleDouble::quick_mult_f64(p, x);
61 p.to_f64()
62}
63
64pub fn f_sqrt1pm1(x: f64) -> f64 {
66 let ix = x.to_bits();
67 let ux = ix.wrapping_shl(1);
68
69 if ux >= 0x7ffu64 << 53 || ux <= 0x7960000000000000u64 {
70 if ux == 0 {
72 return 0.;
74 }
75 if ux.wrapping_shl(11) == 0 {
76 return if x.is_sign_negative() {
78 f64::NAN
79 } else {
80 f64::INFINITY
81 };
82 }
83
84 if ux <= 0x7960000000000000u64 {
85 #[cfg(any(
87 all(
88 any(target_arch = "x86", target_arch = "x86_64"),
89 target_feature = "fma"
90 ),
91 all(target_arch = "aarch64", target_feature = "neon")
92 ))]
93 {
94 return f_fmla(-0.25 * x, 0.25 * x, x * 0.5);
95 }
96 #[cfg(not(any(
97 all(
98 any(target_arch = "x86", target_arch = "x86_64"),
99 target_feature = "fma"
100 ),
101 all(target_arch = "aarch64", target_feature = "neon")
102 )))]
103 {
104 use crate::common::dyad_fmla;
105 return if x < 1e-150 {
106 dyad_fmla(-0.25 * x, 0.25 * x, x * 0.5)
107 } else {
108 f_fmla(-0.25 * x, 0.25 * x, x * 0.5)
109 };
110 }
111 }
112
113 return f64::NAN; }
115
116 if (ix >> 63) != 0 && ux >= 0x7fe0000000000000u64 {
117 if ux == 0x7fe0000000000000u64 {
119 return -1.;
121 }
122 return f64::NAN;
123 }
124
125 if ux <= 0x7f1126e978d4fdf4u64 {
126 const C: [u64; 7] = [
133 0x3fe0000000000000,
134 0xbfc000000000009a,
135 0x3fb0000000000202,
136 0xbfa3fffffdf5a853,
137 0x3f9bfffffb3c75fe,
138 0xbf9500dd6aee8501,
139 0x3f9080d2ece21348,
140 ];
141 let p = f_polyeval6(
142 x,
143 f64::from_bits(C[1]),
144 f64::from_bits(C[2]),
145 f64::from_bits(C[3]),
146 f64::from_bits(C[4]),
147 f64::from_bits(C[5]),
148 f64::from_bits(C[6]),
149 ) * x;
150 let r = DoubleDouble::from_exact_add(f64::from_bits(C[0]), p);
151 let q = DoubleDouble::quick_mult_f64(r, x);
152 let err = f_fmla(
153 q.hi,
154 f64::from_bits(0x3c50000000000000), f64::from_bits(0x3bf0000000000000), );
157 let ub = q.hi + (q.lo + err);
158 let lb = q.hi + (q.lo - err);
159 if ub == lb {
161 return ub;
162 }
163 return sqrt1pm1_near_zero_hard(x);
164 }
165 #[cfg(any(
168 all(
169 any(target_arch = "x86", target_arch = "x86_64"),
170 target_feature = "fma"
171 ),
172 all(target_arch = "aarch64", target_feature = "neon")
173 ))]
174 {
175 let r = DoubleDouble::from_full_exact_add(x, 1.0);
176 let v_sqrt = r.fast_sqrt();
177 let q = DoubleDouble::full_add_f64(v_sqrt, -1.0);
178 q.to_f64()
179 }
180 #[cfg(not(any(
181 all(
182 any(target_arch = "x86", target_arch = "x86_64"),
183 target_feature = "fma"
184 ),
185 all(target_arch = "aarch64", target_feature = "neon")
186 )))]
187 {
188 use crate::double_double::two_product_compatible;
189 if !two_product_compatible(x) {
190 let r = x + 1.;
192 let v_sqrt = r.sqrt();
193 DoubleDouble::from_full_exact_sub(v_sqrt, 1.0).to_f64()
194 } else {
195 let r = DoubleDouble::from_full_exact_add(x, 1.0);
196 let v_sqrt = r.fast_sqrt();
197 let q = DoubleDouble::full_add_f64(v_sqrt, -1.0);
198 q.to_f64()
199 }
200 }
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 #[test]
208 fn test_sqrt1pm1() {
209 assert_eq!(f_sqrt1pm1(15.), 3.0);
210 assert_eq!(f_sqrt1pm1(24.), 4.0);
211 assert_eq!(f_sqrt1pm1(8.), 2.0);
212 assert_eq!(f_sqrt1pm1(-0.75), -0.5);
213 assert_eq!(f_sqrt1pm1(0.5), 0.22474487139158905);
214 assert_eq!(f_sqrt1pm1(0.0005233212), 0.00026162637581973774);
215 assert_eq!(f_sqrt1pm1(-0.0005233212), -0.0002616948420951896);
216 assert_eq!(f_sqrt1pm1(-0.00000000000000000000005233212), -2.616606e-23);
217 assert_eq!(f_sqrt1pm1(0.00000000000000000000005233212), 2.616606e-23);
218 assert_eq!(f_sqrt1pm1(0.), 0.);
219 assert_eq!(f_sqrt1pm1(-0.), 0.);
220 assert_eq!(f_sqrt1pm1(f64::INFINITY), f64::INFINITY);
221 assert!(f_sqrt1pm1(f64::NEG_INFINITY).is_nan());
222 assert!(f_sqrt1pm1(f64::NAN).is_nan());
223 assert!(f_sqrt1pm1(-1.0001).is_nan());
224 }
225}