pxfm/err/
erf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, dyad_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::err::erf_poly::{ERF_POLY, ERF_POLY_C2};
32use crate::rounding::CpuFloor;
33/* double-double approximation of 2/sqrt(pi) to nearest */
34const TWO_OVER_SQRT_PI: DoubleDouble = DoubleDouble::new(
35    f64::from_bits(0x3c71ae3a914fed80),
36    f64::from_bits(0x3ff20dd750429b6d),
37);
38
39pub(crate) struct Erf {
40    pub(crate) result: DoubleDouble,
41    pub(crate) err: f64,
42}
43
44/* for |z| < 1/8, assuming z >= 2^-61, thus no underflow can occur */
45#[cold]
46fn cr_erf_accurate_tiny(x: f64) -> DoubleDouble {
47    static P: [u64; 15] = [
48        0x3ff20dd750429b6d,
49        0x3c71ae3a914fed80,
50        0xbfd812746b0379e7,
51        0x3c6ee12e49ca96ba,
52        0x3fbce2f21a042be2,
53        0xbc52871bc0a0a0d0,
54        0xbf9b82ce31288b51,
55        0x3c21003accf1355c,
56        0x3f7565bcd0e6a53f,
57        0xbf4c02db40040cc3,
58        0x3f1f9a326fa3cf50,
59        0xbeef4d25e3c73ce9,
60        0x3ebb9eb332b31646,
61        0xbe864a4bd5eca4d7,
62        0x3e6c0acc2502e94e,
63    ];
64    let z2 = x * x;
65    let mut h = f64::from_bits(P[21 / 2 + 4]); /* degree 21 */
66    for a in (12..=19).rev().step_by(2) {
67        h = dd_fmla(h, z2, f64::from_bits(P[(a / 2 + 4) as usize]))
68    }
69    let mut l = 0.;
70    for a in (8..=11).rev().step_by(2) {
71        let mut t = DoubleDouble::from_exact_mult(h, x);
72        t.lo = dd_fmla(l, x, t.lo);
73        let mut k = DoubleDouble::from_exact_mult(t.hi, x);
74        k.lo = dd_fmla(t.lo, x, k.lo);
75        let p = DoubleDouble::from_exact_add(f64::from_bits(P[(a / 2 + 4) as usize]), k.hi);
76        l = k.lo + p.lo;
77        h = p.hi;
78    }
79    for a in (1..=7).rev().step_by(2) {
80        let mut t = DoubleDouble::from_exact_mult(h, x);
81        t.lo = dd_fmla(l, x, t.lo);
82        let mut k = DoubleDouble::from_exact_mult(t.hi, x);
83        k.lo = dd_fmla(t.lo, x, k.lo);
84        let p = DoubleDouble::from_exact_add(f64::from_bits(P[a - 1]), k.hi);
85        l = k.lo + p.lo + f64::from_bits(P[a]);
86        h = p.hi;
87    }
88    /* multiply by z */
89    let p = DoubleDouble::from_exact_mult(h, x);
90    l = dd_fmla(l, x, p.lo);
91    DoubleDouble::new(l, p.hi)
92}
93
94/* Assuming 0 <= z <= 0x1.7afb48dc96626p+2, put in h+l an accurate
95approximation of erf(z).
96Assumes z >= 2^-61, thus no underflow can occur. */
97#[cold]
98#[inline(never)]
99pub(crate) fn erf_accurate(x: f64) -> DoubleDouble {
100    if x < 0.125
101    /* z < 1/8 */
102    {
103        return cr_erf_accurate_tiny(x);
104    }
105
106    let v = (8.0 * x).cpu_floor();
107    let i: u32 = (8.0 * x) as u32;
108    let z = (x - 0.0625) - 0.125 * v;
109    /* now |z| <= 1/16 */
110    let p = ERF_POLY_C2[(i - 1) as usize];
111    let mut h = f64::from_bits(p[26]); /* degree-18 */
112    for a in (11..=17).rev() {
113        h = dd_fmla(h, z, f64::from_bits(p[(8 + a) as usize])); /* degree j */
114    }
115    let mut l: f64 = 0.;
116    for a in (8..=10).rev() {
117        let mut t = DoubleDouble::from_exact_mult(h, z);
118        t.lo = dd_fmla(l, z, t.lo);
119        let p = DoubleDouble::from_exact_add(f64::from_bits(p[(8 + a) as usize]), t.hi);
120        h = p.hi;
121        l = p.lo + t.lo;
122    }
123    for a in (0..=7).rev() {
124        let mut t = DoubleDouble::from_exact_mult(h, z);
125        t.lo = dd_fmla(l, z, t.lo);
126        /* add p[2*j] + p[2*j+1] to th + tl: we use two_sum() instead of
127        fast_two_sum because for example for i=3, the coefficient of
128        degree 7 is tiny (0x1.060b78c935b8ep-13) with respect to that
129        of degree 8 (0x1.678b51a9c4b0ap-7) */
130        let v = DoubleDouble::from_exact_add(f64::from_bits(p[(2 * a) as usize]), t.hi);
131        h = v.hi;
132        l = v.lo + t.lo + f64::from_bits(p[(2 * a + 1) as usize]);
133    }
134    DoubleDouble::new(l, h)
135}
136
137/* Assuming 0 <= z <= 5.9215871957945065, put in h+l an approximation
138of erf(z). Return err the maximal relative error:
139|(h + l)/erf(z) - 1| < err*|h+l| */
140#[inline]
141pub(crate) fn erf_fast(x: f64) -> Erf {
142    /* we split [0,5.9215871957945065] into intervals i/16 <= z < (i+1)/16,
143       and for each interval, we use a minimax polynomial:
144       * for i=0 (0 <= z < 1/16) we use a polynomial evaluated at zero,
145         since if we evaluate in the middle 1/32, we will get bad accuracy
146         for tiny z, and moreover z-1/32 might not be exact
147       * for 1 <= i <= 94, we use a polynomial evaluated in the middle of
148         the interval, namely i/16+1/32
149    */
150    if x < 0.0625
151    /* z < 1/16 */
152    {
153        /* the following is a degree-11 minimax polynomial for erf(x) on [0,1/16]
154        generated by Sollya, with double-double coefficients for degree 1 and 3,
155        and double coefficients for degrees 5 to 11 (file erf0.sollya).
156        The maximal relative error is 2^-68.935. */
157        let z2 = DoubleDouble::from_exact_mult(x, x);
158        const C: [u64; 8] = [
159            0x3ff20dd750429b6d,
160            0x3c71ae3a7862d9c4,
161            0xbfd812746b0379e7,
162            0x3c6f1a64d72722a2,
163            0x3fbce2f21a042b7f,
164            0xbf9b82ce31189904,
165            0x3f7565bbf8a0fe0b,
166            0xbf4bf9f8d2c202e4,
167        ];
168        let z4 = z2.hi * z2.hi;
169        let c9 = dd_fmla(f64::from_bits(C[7]), z2.hi, f64::from_bits(C[6]));
170        let mut c5 = dd_fmla(f64::from_bits(C[5]), z2.hi, f64::from_bits(C[4]));
171        c5 = dd_fmla(c9, z4, c5);
172        /* compute c0[2] + c0[3] + z2h*c5 */
173        let mut t = DoubleDouble::from_exact_mult(z2.hi, c5);
174        let mut v = DoubleDouble::from_exact_add(f64::from_bits(C[2]), t.hi);
175        v.lo += t.lo + f64::from_bits(C[3]);
176        /* compute c0[0] + c0[1] + (z2h + z2l)*(h + l) */
177        t = DoubleDouble::from_exact_mult(z2.hi, v.hi);
178        let h_c = v.hi;
179        t.lo += dd_fmla(z2.hi, v.lo, f64::from_bits(C[1]));
180        v = DoubleDouble::from_exact_add(f64::from_bits(C[0]), t.hi);
181        v.lo += dd_fmla(z2.lo, h_c, t.lo);
182        v = DoubleDouble::quick_mult_f64(v, x);
183        return Erf {
184            result: v,
185            err: f64::from_bits(0x3ba7800000000000),
186        }; /* err < 2.48658249618372e-21, cf Analyze0() */
187    }
188
189    let v = (16.0 * x).cpu_floor();
190    let i: u32 = (16.0 * x) as u32;
191    /* i/16 <= z < (i+1)/16 */
192    /* For 0.0625 0 <= z <= 0x1.7afb48dc96626p+2, z - 0.03125 is exact:
193    (1) either z - 0.03125 is in the same binade as z, then 0.03125 is
194        an integer multiple of ulp(z), so is z - 0.03125
195    (2) if z - 0.03125 is in a smaller binade, both z and 0.03125 are
196        integer multiple of the ulp() of that smaller binade.
197    Also, subtracting 0.0625 * v is exact. */
198    let z = (x - 0.03125) - 0.0625 * v;
199    /* now |z| <= 1/32 */
200    let c = ERF_POLY[(i - 1) as usize];
201    let z2 = z * z;
202    let z4 = z2 * z2;
203    /* the degree-10 coefficient is c[12] */
204    let c9 = dd_fmla(f64::from_bits(c[12]), z, f64::from_bits(c[11]));
205    let mut c7 = dd_fmla(f64::from_bits(c[10]), z, f64::from_bits(c[9]));
206    let c5 = dd_fmla(f64::from_bits(c[8]), z, f64::from_bits(c[7]));
207    /* c3h, c3l <- c[5] + z*c[6] */
208    let mut c3 = DoubleDouble::from_exact_add(f64::from_bits(c[5]), z * f64::from_bits(c[6]));
209    c7 = dd_fmla(c9, z2, c7);
210    /* c3h, c3l <- c3h, c3l + c5*z2 */
211    let p = DoubleDouble::from_exact_add(c3.hi, c5 * z2);
212    c3.hi = p.hi;
213    c3.lo += p.lo;
214    /* c3h, c3l <- c3h, c3l + c7*z4 */
215    let p = DoubleDouble::from_exact_add(c3.hi, c7 * z4);
216    c3.hi = p.hi;
217    c3.lo += p.lo;
218    /* c2h, c2l <- c[4] + z*(c3h + c3l) */
219    let mut t = DoubleDouble::from_exact_mult(z, c3.hi);
220    let mut c2 = DoubleDouble::from_exact_add(f64::from_bits(c[4]), t.hi);
221    c2.lo += dd_fmla(z, c3.lo, t.lo);
222    /* compute c[2] + c[3] + z*(c2h + c2l) */
223    t = DoubleDouble::from_exact_mult(z, c2.hi);
224    let mut v = DoubleDouble::from_exact_add(f64::from_bits(c[2]), t.hi);
225    v.lo += t.lo + dd_fmla(z, c2.lo, f64::from_bits(c[3]));
226    /* compute c[0] + c[1] + z*(h + l) */
227    t = DoubleDouble::from_exact_mult(z, v.hi);
228    t.lo = dd_fmla(z, v.lo, t.lo);
229    v = DoubleDouble::from_exact_add(f64::from_bits(c[0]), t.hi);
230    v.lo += t.lo + f64::from_bits(c[1]);
231    Erf {
232        result: v,
233        err: f64::from_bits(0x3ba1100000000000),
234    } /* err < 1.80414390200020e-21, cf analyze_p(1)
235    (larger values of i yield smaller error bounds) */
236}
237
238/// Error function
239///
240/// Max ULP 0.5
241pub fn f_erf(x: f64) -> f64 {
242    let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
243    let mut t = z.to_bits();
244    let ux = t;
245    /* erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
246    if ux > 0x4017afb48dc96626
247    // |x| > 0x4017afb48dc96626
248    {
249        let os = f64::copysign(1.0, x);
250        const MASK: u64 = 0x7ff0000000000000u64;
251        if ux > MASK {
252            return x + x; /* NaN */
253        }
254        if ux == MASK {
255            return os; /* +/-Inf */
256        }
257        return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
258    }
259
260    /* now |x| <= 0x4017afb48dc96626 */
261    if z < f64::from_bits(0x3c20000000000000) {
262        /* for x=-0 the code below returns +0 which is wrong */
263        if x == 0. {
264            return x;
265        }
266        /* tiny x: erf(x) ~ 2/sqrt(pi) * x + O(x^3), where the ratio of the O(x^3)
267        term to the main term is in x^2/3, thus less than 2^-123 */
268        let y = TWO_OVER_SQRT_PI.hi * x; /* tentative result */
269        /* scale x by 2^106 to get out the subnormal range */
270        let sx = x * f64::from_bits(0x4690000000000000);
271        let mut p = DoubleDouble::quick_mult_f64(TWO_OVER_SQRT_PI, sx);
272        /* now compute the residual h + l - y */
273        p.lo += f_fmla(-y, f64::from_bits(0x4690000000000000), p.hi); /* h-y*2^106 is exact since h and y are very close */
274        let res = dyad_fmla(p.lo, f64::from_bits(0x3950000000000000), y);
275        return res;
276    }
277
278    let result = erf_fast(z);
279    let mut u = result.result.hi.to_bits();
280    let mut v = result.result.lo.to_bits();
281    t = x.to_bits();
282
283    const SIGN_MASK: u64 = 0x8000000000000000u64;
284    u ^= t & SIGN_MASK;
285    v ^= t & SIGN_MASK;
286
287    let left = f64::from_bits(u) + f_fmla(result.err, -f64::from_bits(u), f64::from_bits(v));
288    let right = f64::from_bits(u) + f_fmla(result.err, f64::from_bits(u), f64::from_bits(v));
289
290    if left == right {
291        return left;
292    }
293
294    let a_results = erf_accurate(z);
295
296    if x >= 0. {
297        a_results.to_f64()
298    } else {
299        (-a_results.hi) + (-a_results.lo)
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    #[test]
308    fn test_erf() {
309        assert_eq!(f_erf(0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000009456563898732),
310                   0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010670589695636709);
311        assert_eq!(f_erf(0.), 0.);
312        assert_eq!(f_erf(1.), 0.8427007929497149);
313        assert_eq!(f_erf(0.49866735123), 0.5193279892991808);
314        assert_eq!(f_erf(-0.49866735123), -0.5193279892991808);
315        assert!(f_erf(f64::NAN).is_nan());
316        assert_eq!(f_erf(f64::INFINITY), 1.0);
317        assert_eq!(f_erf(f64::NEG_INFINITY), -1.0);
318    }
319}
pxfm/err/erf.rs

pxfm/err/
erf.rs