pxfm/err/
rerf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 8/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30use crate::double_double::DoubleDouble;
31use crate::err::rerf_poly::RERF_HARD;
32use crate::polyeval::f_polyeval7;
33
34#[cold]
35#[inline(never)]
36fn rerf_poly_tiny_hard(x: f64, z2: DoubleDouble) -> f64 {
37    // Polynomial for x/erf(x)
38    // Generated by Sollya.
39    // d = [0, 1/16];
40    // f = x/erf(x);
41    // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107...|], d);
42    // See ./notes/r_erf_tiny_hard.sollya
43    const C: [(u64, u64); 10] = [
44        (0xbc8618f13eb7ca89, 0x3fec5bf891b4ef6b),
45        (0xbc6d7696fe4a7cd0, 0x3fd2e7fb0bcdf4f2),
46        (0xbc0cb8b926064434, 0x3f842aa561ecc102),
47        (0x3c1cd94c2f3e6f09, 0xbf75207c7ef80727),
48        (0xbbb35c4effe3c87c, 0x3f2db4a8d7c32472),
49        (0x3bbf1d1edd1e109a, 0x3f20faa7a99a4d3d),
50        (0xbb9e05d21f4e1755, 0xbef3adb84631c39c),
51        (0x3b6ee5dc31565280, 0xbec366647cacdcc9),
52        (0x3b3698f8162c5fac, 0x3eaabb9db9f3b048),
53        (0xbb026f5401fce891, 0xbe66cd40349520b6),
54    ];
55    let mut p = DoubleDouble::mul_add(
56        z2,
57        DoubleDouble::from_bit_pair(C[9]),
58        DoubleDouble::from_bit_pair(C[8]),
59    );
60    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[7]));
61    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[6]));
62    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[5]));
63    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[4]));
64    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[3]));
65    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[2]));
66    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[1]));
67    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(C[0]));
68    p = DoubleDouble::from_exact_add(p.hi, p.lo);
69    let z = DoubleDouble::div_dd_f64(p, x);
70    z.to_f64()
71}
72
73#[inline]
74fn rerf_poly_tiny(z: f64, x: f64) -> f64 {
75    let z2 = DoubleDouble::from_exact_mult(z, z);
76
77    // Polynomial for x/erf(x)
78    // Generated by Sollya.
79    // d = [0, 1/16];
80    // f = x/erf(x);
81    // Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18|], [|107, 107, 107, D...|], d);
82    // See ./notes/r_erf_tiny.sollya
83    let p = f_polyeval7(
84        z2.hi,
85        f64::from_bits(0xbf75207c7ef80727),
86        f64::from_bits(0x3f2db4a8d7c36a03),
87        f64::from_bits(0x3f20faa7a8db7f27),
88        f64::from_bits(0xbef3adae94983bb2),
89        f64::from_bits(0xbec3b05fe5c49f32),
90        f64::from_bits(0x3ed67902690892be),
91        f64::from_bits(0xbf3090033375e5ee),
92    );
93    let mut r = DoubleDouble::quick_mul_f64_add(
94        z2,
95        p,
96        DoubleDouble::from_bit_pair((0xbc0cb29fd910c494, 0x3f842aa561ecc102)),
97    );
98    r = DoubleDouble::quick_mul_add(
99        z2,
100        r,
101        DoubleDouble::from_bit_pair((0xbc6d7696ff4f712a, 0x3fd2e7fb0bcdf4f2)),
102    );
103    r = DoubleDouble::quick_mul_add(
104        z2,
105        r,
106        DoubleDouble::from_bit_pair((0xbc8618f13eb7ca11, 0x3fec5bf891b4ef6b)),
107    );
108    r = DoubleDouble::from_exact_add(r.hi, r.lo);
109    r = DoubleDouble::div_dd_f64(r, x);
110    let err = f_fmla(
111        r.hi,
112        f64::from_bits(0x3c10000000000000), // 2^-62
113        f64::from_bits(0x3b90000000000000), // 2^-70
114    );
115    let ub = r.hi + (r.lo + err);
116    let lb = r.hi + (r.lo - err);
117    if ub == lb {
118        return r.to_f64();
119    }
120    rerf_poly_tiny_hard(x, z2)
121}
122
123#[inline]
124fn rerf_poly_hard(x: f64, z2: DoubleDouble, idx: usize) -> f64 {
125    let c = &RERF_HARD[idx];
126    let mut p = DoubleDouble::mul_add(
127        z2,
128        DoubleDouble::from_bit_pair(c[10]),
129        DoubleDouble::from_bit_pair(c[9]),
130    );
131    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[8]));
132    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[7]));
133    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[6]));
134    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[5]));
135    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[4]));
136    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[3]));
137    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[2]));
138    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[1]));
139    p = DoubleDouble::mul_add(z2, p, DoubleDouble::from_bit_pair(c[0]));
140    p = DoubleDouble::from_exact_add(p.hi, p.lo);
141    let z = DoubleDouble::div_dd_f64(p, x);
142    z.to_f64()
143}
144
145/// Computes 1/erf(x)
146///
147/// Max ulp 0.5001
148pub fn f_rerf(x: f64) -> f64 {
149    let z = f64::from_bits(x.to_bits() & 0x7fff_ffff_ffff_ffff);
150    let t = z.to_bits();
151    let ux = t;
152    /* 1/erf(x) rounds to +/-1 for RNDN for |x| > 0x4017afb48dc96626 */
153    if ux > 0x4017afb48dc96626
154    // |x| > 0x4017afb48dc96626
155    {
156        let os = f64::copysign(1.0, x);
157        const MASK: u64 = 0x7ff0000000000000u64;
158        if ux > MASK {
159            return x + x; /* NaN */
160        }
161        if ux == MASK {
162            return os; /* +/-Inf */
163        }
164        return f_fmla(-f64::from_bits(0x3c90000000000000), os, os);
165    }
166
167    /* now |x| <= 0x4017afb48dc96626 */
168    if z < f64::from_bits(0x3c20000000000000) {
169        // |x| < 0.0000000000000000004336808689942018
170        /* for x=-0 the code below returns +0 which is wrong */
171        if x == 0. {
172            return if x.is_sign_negative() {
173                f64::NEG_INFINITY
174            } else {
175                f64::INFINITY
176            };
177        }
178
179        if z.to_bits() <= 0x38b7f12369dedu64 {
180            return if x.is_sign_negative() {
181                f64::NEG_INFINITY
182            } else {
183                f64::INFINITY
184            };
185        }
186
187        /* double-double approximation of 2/sqrt(pi) to nearest */
188        const SQRT_PI_OVER_2: DoubleDouble = DoubleDouble::new(
189            f64::from_bits(0xbc8618f13eb7ca89),
190            f64::from_bits(0x3fec5bf891b4ef6b),
191        );
192
193        /* tiny x is Taylor Series: 1/erf(x) ~ sqrt(pi)/(2 * x) + O(x^3), where the ratio of the O(x^3)
194        term to the main term is in x^2/3, thus less than 2^-123 */
195        /* scale x by 2^106 to get out the subnormal range */
196        let sx = x * f64::from_bits(0x4690000000000000);
197        let mut prod = DoubleDouble::div_dd_f64(SQRT_PI_OVER_2, sx);
198        // scale back by 2^106, since we're performed the division
199        prod = DoubleDouble::quick_mult_f64(prod, f64::from_bits(0x4690000000000000));
200        return prod.to_f64();
201    }
202
203    if z.to_bits() < 0x3fb0000000000000u64 {
204        return rerf_poly_tiny(z, x);
205    }
206
207    const SIXTEEN: u64 = 4 << 52;
208    let idx =
209        unsafe { f64::from_bits(z.to_bits().wrapping_add(SIXTEEN)).to_int_unchecked::<usize>() };
210    let z2 = DoubleDouble::from_exact_mult(z, z);
211    rerf_poly_hard(x, z2, idx)
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_erf() {
220        assert_eq!(f_rerf(65.), 1.0);
221        assert_eq!(f_rerf(3.), 1.0000220909849995);
222        assert_eq!(f_rerf(-3.), -1.0000220909849995);
223        assert_eq!(f_rerf(-0.03723630312089732), -23.811078627277197);
224        assert_eq!(
225            f_rerf(0.0000000000000000002336808689942018),
226            3.7924667486354975e18
227        );
228        assert_eq!(f_rerf(2.000225067138672), 1.004695025872889);
229        assert_eq!(f_rerf(0.), f64::INFINITY);
230        assert_eq!(f_rerf(-0.), f64::NEG_INFINITY);
231        assert!(f_rerf(f64::NAN).is_nan());
232    }
233}