pxfm/err/
erffc.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, f_fmla};
30use std::hint::black_box;
31
32static ERR0: [u64; 128] = [
33    0x3ff0000000000000,
34    0x3ff0163da9fb3335,
35    0x3ff02c9a3e778061,
36    0x3ff04315e86e7f85,
37    0x3ff059b0d3158574,
38    0x3ff0706b29ddf6de,
39    0x3ff0874518759bc8,
40    0x3ff09e3ecac6f383,
41    0x3ff0b5586cf9890f,
42    0x3ff0cc922b7247f7,
43    0x3ff0e3ec32d3d1a2,
44    0x3ff0fb66affed31b,
45    0x3ff11301d0125b51,
46    0x3ff12abdc06c31cc,
47    0x3ff1429aaea92de0,
48    0x3ff15a98c8a58e51,
49    0x3ff172b83c7d517b,
50    0x3ff18af9388c8dea,
51    0x3ff1a35beb6fcb75,
52    0x3ff1bbe084045cd4,
53    0x3ff1d4873168b9aa,
54    0x3ff1ed5022fcd91d,
55    0x3ff2063b88628cd6,
56    0x3ff21f49917ddc96,
57    0x3ff2387a6e756238,
58    0x3ff251ce4fb2a63f,
59    0x3ff26b4565e27cdd,
60    0x3ff284dfe1f56381,
61    0x3ff29e9df51fdee1,
62    0x3ff2b87fd0dad990,
63    0x3ff2d285a6e4030b,
64    0x3ff2ecafa93e2f56,
65    0x3ff306fe0a31b715,
66    0x3ff32170fc4cd831,
67    0x3ff33c08b26416ff,
68    0x3ff356c55f929ff1,
69    0x3ff371a7373aa9cb,
70    0x3ff38cae6d05d866,
71    0x3ff3a7db34e59ff7,
72    0x3ff3c32dc313a8e5,
73    0x3ff3dea64c123422,
74    0x3ff3fa4504ac801c,
75    0x3ff4160a21f72e2a,
76    0x3ff431f5d950a897,
77    0x3ff44e086061892d,
78    0x3ff46a41ed1d0057,
79    0x3ff486a2b5c13cd0,
80    0x3ff4a32af0d7d3de,
81    0x3ff4bfdad5362a27,
82    0x3ff4dcb299fddd0d,
83    0x3ff4f9b2769d2ca7,
84    0x3ff516daa2cf6642,
85    0x3ff5342b569d4f82,
86    0x3ff551a4ca5d920f,
87    0x3ff56f4736b527da,
88    0x3ff58d12d497c7fd,
89    0x3ff5ab07dd485429,
90    0x3ff5c9268a5946b7,
91    0x3ff5e76f15ad2148,
92    0x3ff605e1b976dc09,
93    0x3ff6247eb03a5585,
94    0x3ff6434634ccc320,
95    0x3ff6623882552225,
96    0x3ff68155d44ca973,
97    0x3ff6a09e667f3bcd,
98    0x3ff6c012750bdabf,
99    0x3ff6dfb23c651a2f,
100    0x3ff6ff7df9519484,
101    0x3ff71f75e8ec5f74,
102    0x3ff73f9a48a58174,
103    0x3ff75feb564267c9,
104    0x3ff780694fde5d3f,
105    0x3ff7a11473eb0187,
106    0x3ff7c1ed0130c132,
107    0x3ff7e2f336cf4e62,
108    0x3ff80427543e1a12,
109    0x3ff82589994cce13,
110    0x3ff8471a4623c7ad,
111    0x3ff868d99b4492ed,
112    0x3ff88ac7d98a6699,
113    0x3ff8ace5422aa0db,
114    0x3ff8cf3216b5448c,
115    0x3ff8f1ae99157736,
116    0x3ff9145b0b91ffc6,
117    0x3ff93737b0cdc5e5,
118    0x3ff95a44cbc8520f,
119    0x3ff97d829fde4e50,
120    0x3ff9a0f170ca07ba,
121    0x3ff9c49182a3f090,
122    0x3ff9e86319e32323,
123    0x3ffa0c667b5de565,
124    0x3ffa309bec4a2d33,
125    0x3ffa5503b23e255d,
126    0x3ffa799e1330b358,
127    0x3ffa9e6b5579fdbf,
128    0x3ffac36bbfd3f37a,
129    0x3ffae89f995ad3ad,
130    0x3ffb0e07298db666,
131    0x3ffb33a2b84f15fb,
132    0x3ffb59728de5593a,
133    0x3ffb7f76f2fb5e47,
134    0x3ffba5b030a1064a,
135    0x3ffbcc1e904bc1d2,
136    0x3ffbf2c25bd71e09,
137    0x3ffc199bdd85529c,
138    0x3ffc40ab5fffd07a,
139    0x3ffc67f12e57d14b,
140    0x3ffc8f6d9406e7b5,
141    0x3ffcb720dcef9069,
142    0x3ffcdf0b555dc3fa,
143    0x3ffd072d4a07897c,
144    0x3ffd2f87080d89f2,
145    0x3ffd5818dcfba487,
146    0x3ffd80e316c98398,
147    0x3ffda9e603db3285,
148    0x3ffdd321f301b460,
149    0x3ffdfc97337b9b5f,
150    0x3ffe264614f5a129,
151    0x3ffe502ee78b3ff6,
152    0x3ffe7a51fbc74c83,
153    0x3ffea4afa2a490da,
154    0x3ffecf482d8e67f1,
155    0x3ffefa1bee615a27,
156    0x3fff252b376bba97,
157    0x3fff50765b6e4540,
158    0x3fff7bfdad9cbe14,
159    0x3fffa7c1819e90d8,
160    0x3fffd3c22b8f71f1,
161];
162
163static ERFC_COEFFS: [[u64; 16]; 2] = [
164    [
165        0x3fec162355429b28,
166        0x400d99999999999a,
167        0x3fdda951cece2b85,
168        0xbff70ef6cff4bcc4,
169        0x4003d7f7b3d617de,
170        0xc009d0aa47537c51,
171        0x4009754ea9a3fcb1,
172        0xc0027a5453fcc015,
173        0x3ff1ef2e0531aeba,
174        0xbfceca090f5a1c06,
175        0xbfb7a3cd173a063c,
176        0x3fb30fa68a68fddd,
177        0x3f555ad9a326993a,
178        0xbf907e7b0bb39fbf,
179        0x3f52328706c0e950,
180        0x3f6d6aa0b7b19cfe,
181    ],
182    [
183        0x401137c8983f8516,
184        0x400799999999999a,
185        0x3fc05b53aa241333,
186        0xbfca3f53872bf870,
187        0x3fbde4c30742c9d5,
188        0xbfacb24bfa591986,
189        0x3f9666aec059ca5f,
190        0xbf7a61250eb26b0b,
191        0x3f52b28b7924b34d,
192        0x3f041b13a9d45013,
193        0xbf16dd5e8a273613,
194        0x3ef09ce8ea5e8da5,
195        0x3ed33923b4102981,
196        0xbec1dfd161e3f984,
197        0xbe8c87618fcae3b3,
198        0x3e8e8a6ffa0ba2c7,
199    ],
200];
201
202/// Complementary error function
203///
204/// Max ULP 0.5
205pub fn f_erfcf(x: f32) -> f32 {
206    let ax = f32::from_bits(x.to_bits() & 0x7fff_ffff);
207    let axd = ax as f64;
208    let x2 = axd * axd;
209    let t = x.to_bits();
210    let at = t & 0x7fff_ffff;
211    let sgn = t >> 31;
212    let i: i64 = (at > 0x40051000) as i64;
213    /* for x < -0x1.ea8f94p+1, erfc(x) rounds to 2 (to nearest) */
214    if t > 0xc07547cau32 {
215        // x < -0x1.ea8f94p+1
216        if t >= 0xff800000u32 {
217            // -Inf or NaN
218            if t == 0xff800000u32 {
219                return 2.0;
220            } // -Inf
221            return x + x; // NaN
222        }
223        return black_box(2.0) - black_box(f32::from_bits(0x33000000)); // rounds to 2 or nextbelow(2)
224    }
225    /* at is the absolute value of x
226    for x >= 0x1.41bbf8p+3, erfc(x) < 2^-150, thus rounds to 0 or to 2^-149
227    depending on the rounding mode */
228    if at >= 0x4120ddfcu32 {
229        // |x| >= 0x1.41bbf8p+3
230        if at >= 0x7f800000u32 {
231            // +Inf or NaN
232            if at == 0x7f800000u32 {
233                return 0.0;
234            } // +Inf
235            return x + x; // NaN
236        }
237        // 0x1p-149f * 0.25f rounds to 0 or 2^-149 depending on rounding
238        return black_box(f32::from_bits(0x00000001)) * black_box(0.25);
239    }
240    if at <= 0x3db80000u32 {
241        // |x| <= 0x1.7p-4
242        if t == 0xb76c9f62u32 {
243            // x = -0x1.d93ec4p-17
244            return black_box(f32::from_bits(0x3f800085)) + black_box(f32::from_bits(0x33000000)); // exceptional case
245        }
246        /* for |x| <= 0x1.c5bf88p-26. erfc(x) rounds to 1 (to nearest) */
247        if at <= 0x32e2dfc4u32 {
248            // |x| <= 0x1.c5bf88p-26
249            if at == 0 {
250                return 1.0;
251            }
252            static D: [f32; 2] = [f32::from_bits(0xb2800000), f32::from_bits(0x33000000)];
253            return 1.0 + D[sgn as usize];
254        }
255        /* around 0, erfc(x) behaves as 1 - (odd polynomial) */
256        const C: [u64; 5] = [
257            0x3ff20dd750429b6d,
258            0xbfd812746b03610b,
259            0x3fbce2f218831d2f,
260            0xbf9b82c609607dcb,
261            0x3f7553af09b8008e,
262        ];
263
264        let fw0 = f_fmla(x2, f64::from_bits(C[4]), f64::from_bits(C[3]));
265        let fw1 = f_fmla(x2, fw0, f64::from_bits(C[2]));
266        let fw2 = f_fmla(x2, fw1, f64::from_bits(C[1]));
267
268        let f0 = x as f64 * f_fmla(x2, fw2, f64::from_bits(C[0]));
269        return (1.0 - f0) as f32;
270    }
271
272    /* now -0x1.ea8f94p+1 <= x <= 0x1.41bbf8p+3, with |x| > 0x1.7p-4 */
273    const ILN2: f64 = f64::from_bits(0x3ff71547652b82fe);
274    const LN2H: f64 = f64::from_bits(0x3f762e42fefa0000);
275    const LN2L: f64 = f64::from_bits(0x3d0cf79abd6f5dc8);
276
277    let jt = dd_fmla(x2, ILN2, -(1024. + f64::from_bits(0x3f70000000000000))).to_bits();
278    let j: i64 = ((jt << 12) as i64) >> 48;
279    let sf = ((j >> 7) as u64)
280        .wrapping_add(0x3ffu64 | (sgn as u64) << 11)
281        .wrapping_shl(52);
282
283    const CH: [u64; 4] = [
284        0xbfdffffffffff333,
285        0x3fc5555555556a14,
286        0xbfa55556666659b4,
287        0x3f81111074cc7b22,
288    ];
289    let d = f_fmla(LN2L, j as f64, f_fmla(LN2H, j as f64, x2));
290    let d2 = d * d;
291    let e0 = f64::from_bits(ERR0[(j & 127) as usize]);
292
293    let fw0 = f_fmla(d, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
294    let fw1 = f_fmla(d, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
295    let fw2 = f_fmla(d2, fw0, fw1);
296
297    let f = f_fmla(d2, fw2, d);
298
299    let ct = ERFC_COEFFS[i as usize];
300    let z = (axd - f64::from_bits(ct[0])) / (axd + f64::from_bits(ct[1]));
301    let z2 = z * z;
302    let z4 = z2 * z2;
303    let z8 = z4 * z4;
304    let c = &ct[3..];
305
306    let sw0 = f_fmla(z, f64::from_bits(c[1]), f64::from_bits(c[0]));
307    let sw1 = f_fmla(z, f64::from_bits(c[3]), f64::from_bits(c[2]));
308    let sw2 = f_fmla(z, f64::from_bits(c[5]), f64::from_bits(c[4]));
309    let sw3 = f_fmla(z, f64::from_bits(c[7]), f64::from_bits(c[6]));
310
311    let zw0 = f_fmla(z2, sw1, sw0);
312    let zw1 = f_fmla(z2, sw3, sw2);
313
314    let sw4 = f_fmla(z, f64::from_bits(c[9]), f64::from_bits(c[8]));
315    let sw5 = f_fmla(z, f64::from_bits(c[11]), f64::from_bits(c[10]));
316
317    let zw2 = f_fmla(z4, zw1, zw0);
318    let zw3 = f_fmla(z2, sw5, sw4);
319    let zw4 = f_fmla(z4, f64::from_bits(c[12]), zw3);
320
321    let mut s = f_fmla(z8, zw4, zw2);
322
323    s = f_fmla(z, s, f64::from_bits(ct[2]));
324
325    static OFF: [f64; 2] = [0., 2.];
326
327    let r = (f64::from_bits(sf) * f_fmla(-f, e0, e0)) * s;
328    let y = OFF[sgn as usize] + r;
329    y as f32
330}
331
332#[cfg(test)]
333mod tests {
334    use crate::f_erfcf;
335
336    #[test]
337    fn test_erfc() {
338        assert_eq!(f_erfcf(0.0), 1.0);
339        assert_eq!(f_erfcf(0.5), 0.47950011);
340        assert_eq!(f_erfcf(1.0), 0.1572992);
341        assert!(f_erfcf(f32::NAN).is_nan());
342        assert_eq!(f_erfcf(f32::INFINITY), 0.0);
343        assert_eq!(f_erfcf(f32::NEG_INFINITY), 2.0);
344    }
345}