1use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::err::erf_poly::{ERF_POLY, ERF_POLY_C2};
32use crate::floor::FloorFinite;
33const TWO_OVER_SQRT_PI: DoubleDouble = DoubleDouble::new(
35 f64::from_bits(0x3c71ae3a914fed80),
36 f64::from_bits(0x3ff20dd750429b6d),
37);
38
39pub(crate) struct Erf {
40 pub(crate) result: DoubleDouble,
41 pub(crate) err: f64,
42}
43
44#[cold]
46fn cr_erf_accurate_tiny(x: f64) -> DoubleDouble {
47 static P: [u64; 15] = [
48 0x3ff20dd750429b6d,
49 0x3c71ae3a914fed80,
50 0xbfd812746b0379e7,
51 0x3c6ee12e49ca96ba,
52 0x3fbce2f21a042be2,
53 0xbc52871bc0a0a0d0,
54 0xbf9b82ce31288b51,
55 0x3c21003accf1355c,
56 0x3f7565bcd0e6a53f,
57 0xbf4c02db40040cc3,
58 0x3f1f9a326fa3cf50,
59 0xbeef4d25e3c73ce9,
60 0x3ebb9eb332b31646,
61 0xbe864a4bd5eca4d7,
62 0x3e6c0acc2502e94e,
63 ];
64 let z2 = x * x;
65 let mut h = f64::from_bits(P[21 / 2 + 4]); for a in (12..=19).rev().step_by(2) {
67 h = dd_fmla(h, z2, f64::from_bits(P[(a / 2 + 4) as usize]))
68 }
69 let mut l = 0.;
70 for a in (8..=11).rev().step_by(2) {
71 let mut t = DoubleDouble::from_exact_mult(h, x);
72 t.lo = dd_fmla(l, x, t.lo);
73 let mut k = DoubleDouble::from_exact_mult(t.hi, x);
74 k.lo = dd_fmla(t.lo, x, k.lo);
75 let p = DoubleDouble::from_exact_add(f64::from_bits(P[(a / 2 + 4) as usize]), k.hi);
76 l = k.lo + p.lo;
77 h = p.hi;
78 }
79 for a in (1..=7).rev().step_by(2) {
80 let mut t = DoubleDouble::from_exact_mult(h, x);
81 t.lo = dd_fmla(l, x, t.lo);
82 let mut k = DoubleDouble::from_exact_mult(t.hi, x);
83 k.lo = dd_fmla(t.lo, x, k.lo);
84 let p = DoubleDouble::from_exact_add(f64::from_bits(P[a - 1]), k.hi);
85 l = k.lo + p.lo + f64::from_bits(P[a]);
86 h = p.hi;
87 }
88 let p = DoubleDouble::from_exact_mult(h, x);
90 l = dd_fmla(l, x, p.lo);
91 DoubleDouble::new(l, p.hi)
92}
93
94#[cold]
98#[inline(never)]
99pub(crate) fn erf_accurate(x: f64) -> DoubleDouble {
100 if x < 0.125
101 {
103 return cr_erf_accurate_tiny(x);
104 }
105
106 let v = (8.0 * x).floor_finite();
107 let i: u32 = (8.0 * x) as u32;
108 let z = (x - 0.0625) - 0.125 * v;
109 let p = ERF_POLY_C2[(i - 1) as usize];
111 let mut h = f64::from_bits(p[26]); for a in (11..=17).rev() {
113 h = dd_fmla(h, z, f64::from_bits(p[(8 + a) as usize])); }
115 let mut l: f64 = 0.;
116 for a in (8..=10).rev() {
117 let mut t = DoubleDouble::from_exact_mult(h, z);
118 t.lo = dd_fmla(l, z, t.lo);
119 let p = DoubleDouble::from_exact_add(f64::from_bits(p[(8 + a) as usize]), t.hi);
120 h = p.hi;
121 l = p.lo + t.lo;
122 }
123 for a in (0..=7).rev() {
124 let mut t = DoubleDouble::from_exact_mult(h, z);
125 t.lo = dd_fmla(l, z, t.lo);
126 let v = DoubleDouble::from_exact_add(f64::from_bits(p[(2 * a) as usize]), t.hi);
131 h = v.hi;
132 l = v.lo + t.lo + f64::from_bits(p[(2 * a + 1) as usize]);
133 }
134 DoubleDouble::new(l, h)
135}
136
137#[inline]
141pub(crate) fn erf_fast(x: f64) -> Erf {
142 if x < 0.0625
151 {
153 let z2 = DoubleDouble::from_exact_mult(x, x);
158 const C: [u64; 8] = [
159 0x3ff20dd750429b6d,
160 0x3c71ae3a7862d9c4,
161 0xbfd812746b0379e7,
162 0x3c6f1a64d72722a2,
163 0x3fbce2f21a042b7f,
164 0xbf9b82ce31189904,
165 0x3f7565bbf8a0fe0b,
166 0xbf4bf9f8d2c202e4,
167 ];
168 let z4 = z2.hi * z2.hi;
169 let c9 = dd_fmla(f64::from_bits(C[7]), z2.hi, f64::from_bits(C[6]));
170 let mut c5 = dd_fmla(f64::from_bits(C[5]), z2.hi, f64::from_bits(C[4]));
171 c5 = dd_fmla(c9, z4, c5);
172 let mut t = DoubleDouble::from_exact_mult(z2.hi, c5);
174 let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[2]), t.hi);
175 v.lo += t.lo + f64::from_bits(C[3]);
176 t = DoubleDouble::from_exact_mult(z2.hi, v.hi);
178 let h_c = v.hi;
179 t.lo += dd_fmla(z2.hi, v.lo, f64::from_bits(C[1]));
180 v = DoubleDouble::from_exact_add(f64::from_bits(C[0]), t.hi);
181 v.lo += dd_fmla(z2.lo, h_c, t.lo);
182 v = DoubleDouble::quick_mult_f64(v, x);
183 return Erf {
184 result: v,
185 err: f64::from_bits(0x3ba7800000000000),
186 }; }
188
189 let v = (16.0 * x).floor_finite();
190 let i: u32 = (16.0 * x) as u32;
191 let z = (x - 0.03125) - 0.0625 * v;
199 let c = ERF_POLY[(i - 1) as usize];
201 let z2 = z * z;
202 let z4 = z2 * z2;
203 let c9 = dd_fmla(f64::from_bits(c[12]), z, f64::from_bits(c[11]));
205 let mut c7 = dd_fmla(f64::from_bits(c[10]), z, f64::from_bits(c[9]));
206 let c5 = dd_fmla(f64::from_bits(c[8]), z, f64::from_bits(c[7]));
207 let mut c3 = DoubleDouble::from_exact_add(f64::from_bits(c[5]), z * f64::from_bits(c[6]));
209 c7 = dd_fmla(c9, z2, c7);
210 let p = DoubleDouble::from_exact_add(c3.hi, c5 * z2);
212 c3.hi = p.hi;
213 c3.lo += p.lo;
214 let p = DoubleDouble::from_exact_add(c3.hi, c7 * z4);
216 c3.hi = p.hi;
217 c3.lo += p.lo;
218 let mut t = DoubleDouble::from_exact_mult(z, c3.hi);
220 let mut c2 = DoubleDouble::from_exact_add(f64::from_bits(c[4]), t.hi);
221 c2.lo += dd_fmla(z, c3.lo, t.lo);
222 t = DoubleDouble::from_exact_mult(z, c2.hi);
224 let mut v = DoubleDouble::from_exact_add(f64::from_bits(c[2]), t.hi);
225 v.lo += t.lo + dd_fmla(z, c2.lo, f64::from_bits(c[3]));
226 t = DoubleDouble::from_exact_mult(z, v.hi);
228 t.lo = dd_fmla(z, v.lo, t.lo);
229 v = DoubleDouble::from_exact_add(f64::from_bits(c[0]), t.hi);
230 v.lo += t.lo + f64::from_bits(c[1]);
231 Erf {
232 result: v,
233 err: f64::from_bits(0x3ba1100000000000),
234 } }
237
238pub fn f_erf(x: f64) -> f64 {
242 let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
243 let mut t = z.to_bits();
244 let ux = t;
245 if ux > 0x4017afb48dc96626
247 {
249 let os = f64::copysign(1.0, x);
250 const MASK: u64 = 0x7ff0000000000000u64;
251 if ux > MASK {
252 return x + x; }
254 if ux == MASK {
255 return os; }
257 return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
258 }
259
260 if z < f64::from_bits(0x3c20000000000000) {
262 if x == 0. {
264 return x;
265 }
266 let y = TWO_OVER_SQRT_PI.hi * x; let sx = x * f64::from_bits(0x4690000000000000);
271 let mut p = DoubleDouble::quick_mult_f64(TWO_OVER_SQRT_PI, sx);
272 p.lo += f_fmla(-y, f64::from_bits(0x4690000000000000), p.hi); let res = dyad_fmla(p.lo, f64::from_bits(0x3950000000000000), y);
275 return res;
276 }
277
278 let result = erf_fast(z);
279 let mut u = result.result.hi.to_bits();
280 let mut v = result.result.lo.to_bits();
281 t = x.to_bits();
282
283 const SIGN_MASK: u64 = 0x8000000000000000u64;
284 u ^= t & SIGN_MASK;
285 v ^= t & SIGN_MASK;
286
287 let left = f64::from_bits(u) + f_fmla(result.err, -f64::from_bits(u), f64::from_bits(v));
288 let right = f64::from_bits(u) + f_fmla(result.err, f64::from_bits(u), f64::from_bits(v));
289
290 if left == right {
291 return left;
292 }
293
294 let a_results = erf_accurate(z);
295
296 if x >= 0. {
297 a_results.to_f64()
298 } else {
299 (-a_results.hi) + (-a_results.lo)
300 }
301}
302
303#[cfg(test)]
304mod tests {
305 use super::*;
306
307 #[test]
308 fn test_erf() {
309 assert_eq!(f_erf(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009456563898732),
310 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010670589695636709);
311 assert_eq!(f_erf(0.), 0.);
312 assert_eq!(f_erf(1.), 0.8427007929497149);
313 assert_eq!(f_erf(0.49866735123), 0.5193279892991808);
314 assert_eq!(f_erf(-0.49866735123), -0.5193279892991808);
315 assert!(f_erf(f64::NAN).is_nan());
316 assert_eq!(f_erf(f64::INFINITY), 1.0);
317 assert_eq!(f_erf(f64::NEG_INFINITY), -1.0);
318 }
319}