1use crate::common::{dd_fmla, f_fmla};
30use std::hint::black_box;
31
32static ERR0: [u64; 128] = [
33 0x3ff0000000000000,
34 0x3ff0163da9fb3335,
35 0x3ff02c9a3e778061,
36 0x3ff04315e86e7f85,
37 0x3ff059b0d3158574,
38 0x3ff0706b29ddf6de,
39 0x3ff0874518759bc8,
40 0x3ff09e3ecac6f383,
41 0x3ff0b5586cf9890f,
42 0x3ff0cc922b7247f7,
43 0x3ff0e3ec32d3d1a2,
44 0x3ff0fb66affed31b,
45 0x3ff11301d0125b51,
46 0x3ff12abdc06c31cc,
47 0x3ff1429aaea92de0,
48 0x3ff15a98c8a58e51,
49 0x3ff172b83c7d517b,
50 0x3ff18af9388c8dea,
51 0x3ff1a35beb6fcb75,
52 0x3ff1bbe084045cd4,
53 0x3ff1d4873168b9aa,
54 0x3ff1ed5022fcd91d,
55 0x3ff2063b88628cd6,
56 0x3ff21f49917ddc96,
57 0x3ff2387a6e756238,
58 0x3ff251ce4fb2a63f,
59 0x3ff26b4565e27cdd,
60 0x3ff284dfe1f56381,
61 0x3ff29e9df51fdee1,
62 0x3ff2b87fd0dad990,
63 0x3ff2d285a6e4030b,
64 0x3ff2ecafa93e2f56,
65 0x3ff306fe0a31b715,
66 0x3ff32170fc4cd831,
67 0x3ff33c08b26416ff,
68 0x3ff356c55f929ff1,
69 0x3ff371a7373aa9cb,
70 0x3ff38cae6d05d866,
71 0x3ff3a7db34e59ff7,
72 0x3ff3c32dc313a8e5,
73 0x3ff3dea64c123422,
74 0x3ff3fa4504ac801c,
75 0x3ff4160a21f72e2a,
76 0x3ff431f5d950a897,
77 0x3ff44e086061892d,
78 0x3ff46a41ed1d0057,
79 0x3ff486a2b5c13cd0,
80 0x3ff4a32af0d7d3de,
81 0x3ff4bfdad5362a27,
82 0x3ff4dcb299fddd0d,
83 0x3ff4f9b2769d2ca7,
84 0x3ff516daa2cf6642,
85 0x3ff5342b569d4f82,
86 0x3ff551a4ca5d920f,
87 0x3ff56f4736b527da,
88 0x3ff58d12d497c7fd,
89 0x3ff5ab07dd485429,
90 0x3ff5c9268a5946b7,
91 0x3ff5e76f15ad2148,
92 0x3ff605e1b976dc09,
93 0x3ff6247eb03a5585,
94 0x3ff6434634ccc320,
95 0x3ff6623882552225,
96 0x3ff68155d44ca973,
97 0x3ff6a09e667f3bcd,
98 0x3ff6c012750bdabf,
99 0x3ff6dfb23c651a2f,
100 0x3ff6ff7df9519484,
101 0x3ff71f75e8ec5f74,
102 0x3ff73f9a48a58174,
103 0x3ff75feb564267c9,
104 0x3ff780694fde5d3f,
105 0x3ff7a11473eb0187,
106 0x3ff7c1ed0130c132,
107 0x3ff7e2f336cf4e62,
108 0x3ff80427543e1a12,
109 0x3ff82589994cce13,
110 0x3ff8471a4623c7ad,
111 0x3ff868d99b4492ed,
112 0x3ff88ac7d98a6699,
113 0x3ff8ace5422aa0db,
114 0x3ff8cf3216b5448c,
115 0x3ff8f1ae99157736,
116 0x3ff9145b0b91ffc6,
117 0x3ff93737b0cdc5e5,
118 0x3ff95a44cbc8520f,
119 0x3ff97d829fde4e50,
120 0x3ff9a0f170ca07ba,
121 0x3ff9c49182a3f090,
122 0x3ff9e86319e32323,
123 0x3ffa0c667b5de565,
124 0x3ffa309bec4a2d33,
125 0x3ffa5503b23e255d,
126 0x3ffa799e1330b358,
127 0x3ffa9e6b5579fdbf,
128 0x3ffac36bbfd3f37a,
129 0x3ffae89f995ad3ad,
130 0x3ffb0e07298db666,
131 0x3ffb33a2b84f15fb,
132 0x3ffb59728de5593a,
133 0x3ffb7f76f2fb5e47,
134 0x3ffba5b030a1064a,
135 0x3ffbcc1e904bc1d2,
136 0x3ffbf2c25bd71e09,
137 0x3ffc199bdd85529c,
138 0x3ffc40ab5fffd07a,
139 0x3ffc67f12e57d14b,
140 0x3ffc8f6d9406e7b5,
141 0x3ffcb720dcef9069,
142 0x3ffcdf0b555dc3fa,
143 0x3ffd072d4a07897c,
144 0x3ffd2f87080d89f2,
145 0x3ffd5818dcfba487,
146 0x3ffd80e316c98398,
147 0x3ffda9e603db3285,
148 0x3ffdd321f301b460,
149 0x3ffdfc97337b9b5f,
150 0x3ffe264614f5a129,
151 0x3ffe502ee78b3ff6,
152 0x3ffe7a51fbc74c83,
153 0x3ffea4afa2a490da,
154 0x3ffecf482d8e67f1,
155 0x3ffefa1bee615a27,
156 0x3fff252b376bba97,
157 0x3fff50765b6e4540,
158 0x3fff7bfdad9cbe14,
159 0x3fffa7c1819e90d8,
160 0x3fffd3c22b8f71f1,
161];
162
163static ERFC_COEFFS: [[u64; 16]; 2] = [
164 [
165 0x3fec162355429b28,
166 0x400d99999999999a,
167 0x3fdda951cece2b85,
168 0xbff70ef6cff4bcc4,
169 0x4003d7f7b3d617de,
170 0xc009d0aa47537c51,
171 0x4009754ea9a3fcb1,
172 0xc0027a5453fcc015,
173 0x3ff1ef2e0531aeba,
174 0xbfceca090f5a1c06,
175 0xbfb7a3cd173a063c,
176 0x3fb30fa68a68fddd,
177 0x3f555ad9a326993a,
178 0xbf907e7b0bb39fbf,
179 0x3f52328706c0e950,
180 0x3f6d6aa0b7b19cfe,
181 ],
182 [
183 0x401137c8983f8516,
184 0x400799999999999a,
185 0x3fc05b53aa241333,
186 0xbfca3f53872bf870,
187 0x3fbde4c30742c9d5,
188 0xbfacb24bfa591986,
189 0x3f9666aec059ca5f,
190 0xbf7a61250eb26b0b,
191 0x3f52b28b7924b34d,
192 0x3f041b13a9d45013,
193 0xbf16dd5e8a273613,
194 0x3ef09ce8ea5e8da5,
195 0x3ed33923b4102981,
196 0xbec1dfd161e3f984,
197 0xbe8c87618fcae3b3,
198 0x3e8e8a6ffa0ba2c7,
199 ],
200];
201
202pub fn f_erfcf(x: f32) -> f32 {
206 let ax = f32::from_bits(x.to_bits() & 0x7fff_ffff);
207 let axd = ax as f64;
208 let x2 = axd * axd;
209 let t = x.to_bits();
210 let at = t & 0x7fff_ffff;
211 let sgn = t >> 31;
212 let i: i64 = (at > 0x40051000) as i64;
213 if t > 0xc07547cau32 {
215 if t >= 0xff800000u32 {
217 if t == 0xff800000u32 {
219 return 2.0;
220 } return x + x; }
223 return black_box(2.0) - black_box(f32::from_bits(0x33000000)); }
225 if at >= 0x4120ddfcu32 {
229 if at >= 0x7f800000u32 {
231 if at == 0x7f800000u32 {
233 return 0.0;
234 } return x + x; }
237 return black_box(f32::from_bits(0x00000001)) * black_box(0.25);
239 }
240 if at <= 0x3db80000u32 {
241 if t == 0xb76c9f62u32 {
243 return black_box(f32::from_bits(0x3f800085)) + black_box(f32::from_bits(0x33000000)); }
246 if at <= 0x32e2dfc4u32 {
248 if at == 0 {
250 return 1.0;
251 }
252 static D: [f32; 2] = [f32::from_bits(0xb2800000), f32::from_bits(0x33000000)];
253 return 1.0 + D[sgn as usize];
254 }
255 const C: [u64; 5] = [
257 0x3ff20dd750429b6d,
258 0xbfd812746b03610b,
259 0x3fbce2f218831d2f,
260 0xbf9b82c609607dcb,
261 0x3f7553af09b8008e,
262 ];
263
264 let fw0 = f_fmla(x2, f64::from_bits(C[4]), f64::from_bits(C[3]));
265 let fw1 = f_fmla(x2, fw0, f64::from_bits(C[2]));
266 let fw2 = f_fmla(x2, fw1, f64::from_bits(C[1]));
267
268 let f0 = x as f64 * f_fmla(x2, fw2, f64::from_bits(C[0]));
269 return (1.0 - f0) as f32;
270 }
271
272 const ILN2: f64 = f64::from_bits(0x3ff71547652b82fe);
274 const LN2H: f64 = f64::from_bits(0x3f762e42fefa0000);
275 const LN2L: f64 = f64::from_bits(0x3d0cf79abd6f5dc8);
276
277 let jt = dd_fmla(x2, ILN2, -(1024. + f64::from_bits(0x3f70000000000000))).to_bits();
278 let j: i64 = ((jt << 12) as i64) >> 48;
279 let sf = ((j >> 7) as u64)
280 .wrapping_add(0x3ffu64 | (sgn as u64) << 11)
281 .wrapping_shl(52);
282
283 const CH: [u64; 4] = [
284 0xbfdffffffffff333,
285 0x3fc5555555556a14,
286 0xbfa55556666659b4,
287 0x3f81111074cc7b22,
288 ];
289 let d = f_fmla(LN2L, j as f64, f_fmla(LN2H, j as f64, x2));
290 let d2 = d * d;
291 let e0 = f64::from_bits(ERR0[(j & 127) as usize]);
292
293 let fw0 = f_fmla(d, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
294 let fw1 = f_fmla(d, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
295 let fw2 = f_fmla(d2, fw0, fw1);
296
297 let f = f_fmla(d2, fw2, d);
298
299 let ct = ERFC_COEFFS[i as usize];
300 let z = (axd - f64::from_bits(ct[0])) / (axd + f64::from_bits(ct[1]));
301 let z2 = z * z;
302 let z4 = z2 * z2;
303 let z8 = z4 * z4;
304 let c = &ct[3..];
305
306 let sw0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
307 let sw1 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
308 let sw2 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
309 let sw3 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
310
311 let zw0 = f_fmla(z2, sw1, sw0);
312 let zw1 = f_fmla(z2, sw3, sw2);
313
314 let sw4 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
315 let sw5 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
316
317 let zw2 = f_fmla(z4, zw1, zw0);
318 let zw3 = f_fmla(z2, sw5, sw4);
319 let zw4 = f_fmla(z4, f64::from_bits(c[12]), zw3);
320
321 let mut s = f_fmla(z8, zw4, zw2);
322
323 s = f_fmla(z, s, f64::from_bits(ct[2]));
324
325 static OFF: [f64; 2] = [0., 2.];
326
327 let r = (f64::from_bits(sf) * f_fmla(-f, e0, e0)) * s;
328 let y = OFF[sgn as usize] + r;
329 y as f32
330}
331
332#[cfg(test)]
333mod tests {
334 use crate::f_erfcf;
335
336 #[test]
337 fn test_erfc() {
338 assert_eq!(f_erfcf(0.0), 1.0);
339 assert_eq!(f_erfcf(0.5), 0.47950011);
340 assert_eq!(f_erfcf(1.0), 0.1572992);
341 assert!(f_erfcf(f32::NAN).is_nan());
342 assert_eq!(f_erfcf(f32::INFINITY), 0.0);
343 assert_eq!(f_erfcf(f32::NEG_INFINITY), 2.0);
344 }
345}