pxfm/err/
rerff.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::f_fmla;
30
31// Polynomials approximating x/erf(x) on ( k/8, (k + 1)/8 ) generated by Sollya and SageMath
32// ```text
33// def build_sollya_script(idx):
34//     return f"""
35// d = [{idx}/8, {idx + 1}/8];
36//
37// f = x/erf(x);
38// Q = fpminimax(f, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], d);
39//
40// for i from 0 to degree(Q) by 2 do {{
41//     write(coeff(Q, i)) >> "coefficients.txt";
42//     write("\\n") >> "coefficients.txt";
43// }};
44// """
45//
46// def load_coefficients(filename):
47//     with open(filename, "r") as f:
48//         return [RealField(500)(line.strip()) for line in f if line.strip()]
49//
50// def call_sollya_on_interval(idx):
51//     sollya_script = build_sollya_script(idx)
52//     with open("tmp_interval.sollya", "w") as f:
53//         f.write(sollya_script)
54//     import subprocess
55//     if os.path.exists("coefficients.txt"):
56//         os.remove("coefficients.txt")
57//     try:
58//         result = subprocess.run(
59//             ["sollya", "tmp_interval.sollya"],
60//             check=True,
61//             capture_output=True,
62//             text=True
63//         )
64//     except subprocess.CalledProcessError as e:
65//         return
66//
67// def print_coeffs(poly):
68//     print("[")
69//     for i in range(len(poly)):
70//         coeff = poly[i]
71//         print(double_to_hex(coeff), ",")
72//     print("],")
73//
74// print(f"static COEFFS: [[u64; 8]; 32] = [")
75// for i in range(0, 32):
76//     call_sollya_on_interval(i)
77//     coeffs = load_coefficients(f"coefficients.txt")
78//     print_coeffs(coeffs)
79// print("];")
80// ```
81static COEFFS: [[u64; 8]; 32] = [
82    [
83        0x3fec5bf891b4ef6b,
84        0x3fd2e7fb0bcdee7f,
85        0x3f842aa5641a200a,
86        0xbf752081ae81d16e,
87        0x3f2e1a191fb85592,
88        0xbf203a20ad500043,
89        0x3f861a864f719e76,
90        0xbfc79f68bad20bd1,
91    ],
92    [
93        0x3fec5bf891b4ef6b,
94        0x3fd2e7fb0bcdf45b,
95        0x3f842aa561f35512,
96        0xbf75207c8167ac1d,
97        0x3f2db4b119b4ce20,
98        0x3f20fa28737c4219,
99        0xbef38e74cca2219a,
100        0xbec5d70713fc621e,
101    ],
102    [
103        0x3fec5bf891b4ef30,
104        0x3fd2e7fb0bce1c0f,
105        0x3f842aa56138541f,
106        0xbf75207c6197eb7c,
107        0x3f2db4799120e074,
108        0x3f20fc28d915a6e9,
109        0xbef3ea5b479dc053,
110        0xbebbffe6df8ec372,
111    ],
112    [
113        0x3fec5bf891b4bf18,
114        0x3fd2e7fb0bde166f,
115        0x3f842aa53c721766,
116        0xbf7520796733bbec,
117        0x3f2db21eebf4144f,
118        0x3f210545cc78d0f0,
119        0xbef48ad7e4aa2d10,
120        0xbeb24a043ad31907,
121    ],
122    [
123        0x3fec5bf891ab16e9,
124        0x3fd2e7fb0dc7b919,
125        0x3f842aa29d8381e7,
126        0xbf7520592585d601,
127        0x3f2da30df1566e43,
128        0x3f212780ff325aa6,
129        0xbef5e98ea9819e42,
130        0xbe9849d52099dcb9,
131    ],
132    [
133        0x3fec5bf890ddfa8d,
134        0x3fd2e7fb28aab312,
135        0x3f842a8a461f0eb7,
136        0xbf751f93b2d27114,
137        0x3f2d66789eed5f95,
138        0x3f21818ff1832f50,
139        0xbef84264724049ef,
140        0x3e9df12b02e82a5a,
141    ],
142    [
143        0x3fec5bf887f64fa4,
144        0x3fd2e7fbfcc05f75,
145        0x3f842a02323e2099,
146        0xbf751c86d291ced6,
147        0x3f2cbd5653cde433,
148        0x3f223299b32b8583,
149        0xbefb7fc6e286cd94,
150        0x3eb49676cb3da393,
151    ],
152    [
153        0x3fec5bf84f8e2488,
154        0x3fd2e7ffe83d2974,
155        0x3f842821c5cc659c,
156        0xbf7514805a6196e3,
157        0x3f2b723680f64bb5,
158        0x3f233416dcfcd366,
159        0xbefefe55300afaa7,
160        0x3ebf0c475fb71e7a,
161    ],
162    [
163        0x3fec5bf7999e6afe,
164        0x3fd2e809c6d4caa7,
165        0x3f84247256be4a56,
166        0xbf750838db0c0cf5,
167        0x3f29e7e867267388,
168        0x3f24226adee5ce74,
169        0xbf00c0830af2bf01,
170        0x3ec26fb6b18e628b,
171    ],
172    [
173        0x3fec5bf801fc5ad5,
174        0x3fd2e80618e8941e,
175        0x3f84254c04b0b234,
176        0xbf7509d7cf351201,
177        0x3f2a01829944820c,
178        0x3f241d7bb0c7e2de,
179        0xbf00c2d844916d26,
180        0x3ec2817d39abc26b,
181    ],
182    [
183        0x3fec5c0938a12f13,
184        0x3fd2e7706c510d79,
185        0x3f8448392db86aae,
186        0xbf75526e9c6046f0,
187        0x3f2facd0bc0e7862,
188        0x3f21fc4093e1e6b7,
189        0xbefdf54af68ba968,
190        0x3ebfe348fc246c15,
191    ],
192    [
193        0x3fec5c6dcdadc5d8,
194        0x3fd2e495072afff3,
195        0x3f84d6f390564d4d,
196        0xbf764a7e85749c85,
197        0x3f37effb62caee80,
198        0x3f19cb39bc236ae6,
199        0xbef6d7035785e8f3,
200        0x3eb755aa2e58fc52,
201    ],
202    [
203        0x3fec5dd74381acff,
204        0x3fd2dbe68140f116,
205        0x3f86459e1acfda0f,
206        0xbf7865203923a03d,
207        0x3f43665053a48049,
208        0x3f0409e353b761ea,
209        0xbeeb0b00f567c9f8,
210        0x3eabc33000611b25,
211    ],
212    [
213        0x3fec6175431226d1,
214        0x3fd2c8dcbb0babcc,
215        0x3f88f5bfd61e5d2e,
216        0xbf7bc60de8dff620,
217        0x3f4d9b7076c7767c,
218        0xbf0106584fac3547,
219        0xbed0a56cd1030deb,
220        0x3e970ee11e7beb48,
221    ],
222    [
223        0x3fec68445d99a8e9,
224        0x3fd2a9d608dbfea2,
225        0x3f8cc072ddf22cb6,
226        0xbf7fe5f2efdc5f5c,
227        0x3f5431d1deff38bc,
228        0xbf197220e4a1dda8,
229        0x3ec9e2469e6c1c67,
230        0x3e4be72535d53d7b,
231    ],
232    [
233        0x3fec713c415bf088,
234        0x3fd28610e83aa38c,
235        0x3f9049ee1942f46b,
236        0xbf81c513d165d6fd,
237        0x3f585bc13e0fcaba,
238        0xbf22715362e30768,
239        0x3ede6bfa3c69e8e3,
240        0xbe852cd85f8dea5b,
241    ],
242    [
243        0x3fec770e08b47107,
244        0x3fd2716324b22047,
245        0x3f91460d403e6b9c,
246        0xbf829ab46375f10d,
247        0x3f5a0e7f00c76fb5,
248        0xbf2484890f2d7eeb,
249        0x3ee207b21bbd8496,
250        0xbe8bbee036671b6a,
251    ],
252    [
253        0x3fec6f4a2d01088d,
254        0x3fd288e494bc89b7,
255        0x3f905203788a2821,
256        0xbf81eab2727ce365,
257        0x3f58ddba75a3c100,
258        0xbf2347c9a317a175,
259        0x3ee099c93ce5f44f,
260        0xbe88e9f9c064f833,
261    ],
262    [
263        0x3fec4c9bbce50c7d,
264        0x3fd2e8175b0e1837,
265        0x3f89a2d1518c7a4c,
266        0xbf7f3fa91859127e,
267        0x3f55431c495b1077,
268        0xbf1fc1af665bb1f8,
269        0x3eda0f1d735195cb,
270        0xbe827b8d6fa224ed,
271    ],
272    [
273        0x3fec03cce39d7213,
274        0x3fd39c2316e290bf,
275        0x3f7b674438899313,
276        0xbf783644c88c71fb,
277        0x3f5047a3da485180,
278        0xbf1748b54f823d57,
279        0x3ed20c86d3302f22,
280        0xbe77f94cafe045a8,
281    ],
282    [
283        0x3feb913f0adf6c4b,
284        0x3fd49c4cedae09fc,
285        0xbf4a6dea9778f474,
286        0xbf7006dc4e6c8125,
287        0x3f461483c254fa5f,
288        0xbf0e75052760bf18,
289        0x3ec65425869bc096,
290        0xbe6bc2df9fbc0f82,
291    ],
292    [
293        0x3feafbeda3b7d400,
294        0x3fd5cb900ee1fb5e,
295        0xbf8228d16e87de3d,
296        0xbf6011d44e155bf5,
297        0x3f3993b736442257,
298        0xbf017c7ee5efa6ad,
299        0x3eb886e337d2e3c2,
300        0xbe5cba4b79e90043,
301    ],
302    [
303        0x3fea54849d309eba,
304        0x3fd701afa55e3d21,
305        0xbf90c72bb2e2799f,
306        0xbf33c92573294e34,
307        0x3f265284f7a6d53a,
308        0xbef09f09298ed1e8,
309        0x3ea7153a46cb2e27,
310        0xbe49ef6ec79265fd,
311    ],
312    [
313        0x3fe9b128df667870,
314        0x3fd816d295a867cb,
315        0xbf9713f11ea84a26,
316        0x3f4edcbdd63903bb,
317        0x3ef44f54fc7a6024,
318        0xbed45da547d2fcb8,
319        0x3e9049754d57a9a7,
320        0xbe32aba05ca26a69,
321    ],
322    [
323        0x3fe927f49edf4ace,
324        0x3fd8ecd207c6a7d1,
325        0xbf9b8cd215124008,
326        0x3f5cbab209dd389d,
327        0xbf12a8920ea6230f,
328        0x3eb442dfce60b0e2,
329        0x3e52494e415c7728,
330        0xbe09a1b1bbb9cee4,
331    ],
332    [
333        0x3fe8ca3d7437d06f,
334        0x3fd973c08b6d33fb,
335        0xbf9e272ca1fccc06,
336        0x3f61efd00e2016b6,
337        0xbf1e6dab18e9d45a,
338        0x3ed0b446e3469be1,
339        0xbe7503c584488bed,
340        0x3e069968660290a4,
341    ],
342    [
343        0x3fe8a1f4b154f663,
344        0x3fd9a9a8b81692d4,
345        0xbf9f1e9312dd4501,
346        0x3f632b4d20599404,
347        0xbf2119c1b5e43b24,
348        0x3ed42b9874284d56,
349        0xbe7c17cc1eef4b9d,
350        0x3e117f0a9057a689,
351    ],
352    [
353        0x3fe8b15bfcf78f33,
354        0x3fd99720c884ab33,
355        0xbf9ed2265979f5a6,
356        0x3f62d3c30432692b,
357        0xbf20a17346c37362,
358        0x3ed36538f2d21c31,
359        0xbe7aac6bb10f8b90,
360        0x3e1061d3a1737044,
361    ],
362    [
363        0x3fe8f479e98cb825,
364        0x3fd94ab3f8d0c80c,
365        0xbf9da7afe85abf94,
366        0x3f618fe28f71a3d4,
367        0xbf1df723b2a63e38,
368        0x3ed0d190252a7f7c,
369        0xbe7631fdd49272b0,
370        0x3e0a17567cab4a94,
371    ],
372    [
373        0x3fe9636d647b61c0,
374        0x3fd8d4aaba0e0212,
375        0xbf9bf904574e56ea,
376        0x3f5fb68684d8555d,
377        0xbf19d06f9cf17bbf,
378        0x3ecb92b9f0b8acf3,
379        0xbe7145bde0c499ae,
380        0x3e033cf1cb08ce4c,
381    ],
382    [
383        0x3fe9f4c3301b6d33,
384        0x3fd844100b4598b3,
385        0xbf9a0b94e19be990,
386        0x3f5c0ed55c70532f,
387        0xbf15a786c9e62b23,
388        0x3ec5e3f05a04f5c6,
389        0xbe69ea9db2e37883,
390        0x3dfb3e5ad2cd0fb2,
391    ],
392    [
393        0x3fea9f469c75536c,
394        0x3fd7a51b3d9eda10,
395        0xbf980f63a2cb486c,
396        0x3f5887f72a9f07e0,
397        0xbf11e4d454f2f994,
398        0x3ec113d0aed8bdef,
399        0xbe6311f84083acf4,
400        0x3df2e4dc2e50e3fa,
401    ],
402];
403
404/// Computes 1/erf(x)
405///
406/// Max ulp 0.5
407pub fn f_rerff(x: f32) -> f32 {
408    let x_u = x.to_bits();
409    let x_abs = x_u & 0x7fff_ffffu32;
410
411    if x == 0. {
412        return if x.is_sign_negative() {
413            f32::NEG_INFINITY
414        } else {
415            f32::INFINITY
416        };
417    }
418
419    if x_abs >= 0x4080_0000u32 {
420        static ONE: [f32; 2] = [1.0, -1.0];
421        static SMALL: [f32; 2] = [f32::from_bits(0xb3000000), f32::from_bits(0x33000000)];
422
423        let sign = x.is_sign_negative() as usize;
424
425        if x_abs >= 0x7f80_0000u32 {
426            return if x_abs > 0x7f80_0000 { x } else { ONE[sign] };
427        }
428
429        return ONE[sign] + SMALL[sign];
430    }
431
432    // Polynomial approximation see [COEFFS] for generation:
433    //   1/erf(x) ~ (c0 + c1 * x^2 + c2 * x^4 + ... + c7 * x^14) / x
434    let xd = x as f64;
435    let xsq = xd * xd;
436
437    const EIGHT: u32 = 3 << 23;
438    let idx = unsafe { f32::from_bits(x_abs.wrapping_add(EIGHT)).to_int_unchecked::<usize>() };
439
440    let c = COEFFS[idx];
441
442    let x4 = xsq * xsq;
443    let c0 = f_fmla(xsq, f64::from_bits(c[1]), f64::from_bits(c[0]));
444    let c1 = f_fmla(xsq, f64::from_bits(c[3]), f64::from_bits(c[2]));
445    let c2 = f_fmla(xsq, f64::from_bits(c[5]), f64::from_bits(c[4]));
446    let c3 = f_fmla(xsq, f64::from_bits(c[7]), f64::from_bits(c[6]));
447
448    let x8 = x4 * x4;
449    let p0 = f_fmla(x4, c1, c0);
450    let p1 = f_fmla(x4, c3, c2);
451
452    ((f_fmla(x8, p1, p0)) / xd) as f32
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458
459    #[test]
460    fn f_erff_test() {
461        assert!(f_rerff(f32::NAN).is_nan());
462        assert_eq!(f_rerff(0.0), f32::INFINITY);
463        assert_eq!(f_rerff(-0.0), f32::NEG_INFINITY);
464        assert_eq!(f_rerff(0.015255669), 58.096153);
465        assert_eq!(f_rerff(1.0), 1.1866608);
466        assert_eq!(f_rerff(0.5), 1.9212301);
467    }
468}