pxfm/hyperbolic/
atanh.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::hyperbolic::acosh::{
32    ACOSH_ASINH_B, ACOSH_ASINH_LL, ACOSH_ASINH_R1, ACOSH_ASINH_R2, ACOSH_ASINH_REFINE_T2,
33    ACOSH_ASINH_REFINE_T4, ACOSH_SINH_REFINE_T1, ACOSH_SINH_REFINE_T3, lpoly_xd_generic,
34};
35
36static ATANH_L1: [(u64, u64); 33] = [
37    (0x0000000000000000, 0x0000000000000000),
38    (0xbe4532c1269e2038, 0x3f862e5000000000),
39    (0x3e4ce42d81b54e84, 0x3f962e3c00000000),
40    (0xbe525826f815ec3d, 0x3fa0a2ac00000000),
41    (0x3e50db1b1e7cee11, 0x3fa62e4a00000000),
42    (0xbe51f3a8c6c95003, 0x3fabb9dc00000000),
43    (0xbe5774cd4fb8c30d, 0x3fb0a2b200000000),
44    (0x3e2452e56c030a0a, 0x3fb3687f00000000),
45    (0x3e36b63c4966a79a, 0x3fb62e4100000000),
46    (0xbe3b20a21ccb525e, 0x3fb8f40a00000000),
47    (0x3e54006cfb3d8f85, 0x3fbbb9d100000000),
48    (0xbe5cdb026b310c41, 0x3fbe7f9b00000000),
49    (0xbe569124fdc0f16d, 0x3fc0a2b080000000),
50    (0xbe5084656cdc2727, 0x3fc2059580000000),
51    (0xbe5376fa8b0357fd, 0x3fc3687c00000000),
52    (0x3e3e56ae55a47b4a, 0x3fc4cb5e80000000),
53    (0x3e5070ff8834eeb4, 0x3fc62e4400000000),
54    (0x3e5623516109f4fe, 0x3fc7912900000000),
55    (0xbe2ec656b95fbdac, 0x3fc8f40b00000000),
56    (0x3e3f0ca2e729f510, 0x3fca56ed80000000),
57    (0xbe57d260a858354a, 0x3fcbb9d680000000),
58    (0x3e4e7279075503d3, 0x3fcd1cb900000000),
59    (0x3e439e1a0a503873, 0x3fce7f9d00000000),
60    (0x3e5cd86d7b87c3d6, 0x3fcfe27d80000000),
61    (0x3e5060ab88de341e, 0x3fd0a2b240000000),
62    (0x3e320a860d3f9390, 0x3fd1542440000000),
63    (0xbe4dacee95fc2f10, 0x3fd2059740000000),
64    (0x3e545de3a86e0aca, 0x3fd2b70700000000),
65    (0x3e4c164cbfb991af, 0x3fd3687b00000000),
66    (0x3e5d3f66b24225ef, 0x3fd419ec40000000),
67    (0x3e5fc023efa144ba, 0x3fd4cb5f80000000),
68    (0x3e3086a8af6f26c0, 0x3fd57cd280000000),
69    (0xbe105c610ca86c39, 0x3fd62e4300000000),
70];
71
72static ATANH_L2: [(u64, u64); 33] = [
73    (0x0000000000000000, 0x0000000000000000),
74    (0xbe337e152a129e4e, 0x3f36320000000000),
75    (0xbe53f6c916b8be9c, 0x3f46300000000000),
76    (0x3e520505936739d5, 0x3f50a24000000000),
77    (0xbe523e2e8cb541ba, 0x3f562dc000000000),
78    (0xbdfacb7983ac4f5e, 0x3f5bba0000000000),
79    (0x3e36f7c7689c63ae, 0x3f60a2a000000000),
80    (0x3e1f5ca695b4c58b, 0x3f6368c000000000),
81    (0xbe4c6c18bd953226, 0x3f662e6000000000),
82    (0x3e57a516c34846bd, 0x3f68f46000000000),
83    (0xbe4f3b83dd8b8530, 0x3f6bba0000000000),
84    (0xbe0c3459046e4e57, 0x3f6e800000000000),
85    (0x3d9b5c7e34cb79f6, 0x3f70a2c000000000),
86    (0xbe42487e9af9a692, 0x3f7205c000000000),
87    (0x3e5f21bbc4ad79ce, 0x3f73687000000000),
88    (0xbe2550ffc857b731, 0x3f74cb7000000000),
89    (0x3e487458ec1b7b34, 0x3f762e2000000000),
90    (0x3e5103d4fe83ee81, 0x3f77911000000000),
91    (0x3e4810483d3b398c, 0x3f78f44000000000),
92    (0xbe42085cb340608e, 0x3f7a573000000000),
93    (0x3e512698a119c42f, 0x3f7bb9d000000000),
94    (0xbe5edb8c172b4c33, 0x3f7d1cc000000000),
95    (0xbe58b55b87a5e238, 0x3f7e7fe000000000),
96    (0x3e5be5e17763f78a, 0x3f7fe2b000000000),
97    (0xbe1c2d496790073e, 0x3f80a2a800000000),
98    (0x3e56542f523abeec, 0x3f81541000000000),
99    (0xbe5b7fdbe5b193f8, 0x3f8205a000000000),
100    (0x3e5fa4d42fe30c7c, 0x3f82b70000000000),
101    (0x3e50d46ad04adc86, 0x3f83688800000000),
102    (0xbe51c22d02d17c4c, 0x3f8419f000000000),
103    (0x3e1a7d1e330dccce, 0x3f84cb7000000000),
104    (0x3e0187025e656ba3, 0x3f857cd000000000),
105    (0xbe4532c1269e2038, 0x3f862e5000000000),
106];
107
108#[cold]
109fn as_atanh_zero(x: f64) -> f64 {
110    static CH: [(u64, u64); 13] = [
111        (0x3c75555555555555, 0x3fd5555555555555),
112        (0xbc6999999999611c, 0x3fc999999999999a),
113        (0x3c62492490f76b25, 0x3fc2492492492492),
114        (0x3c5c71cd5c38a112, 0x3fbc71c71c71c71c),
115        (0xbc47556c4165f4ca, 0x3fb745d1745d1746),
116        (0xbc4b893c3b36052e, 0x3fb3b13b13b13b14),
117        (0x3c44e1afd723ed1f, 0x3fb1111111111105),
118        (0xbc4f86ea96fb1435, 0x3fae1e1e1e1e2678),
119        (0x3c31e51a6e54fde9, 0x3faaf286bc9f90cc),
120        (0xbc2ab913de95c3bf, 0x3fa8618618c779b6),
121        (0x3c4632e747641b12, 0x3fa642c84aa383eb),
122        (0xbc30c9617e7bcff2, 0x3fa47ae2d205013c),
123        (0x3c23adb3e2b7f35e, 0x3fa2f664d60473f9),
124    ];
125
126    const CL: [u64; 5] = [
127        0x3fa1a9a91fd692af,
128        0x3fa06dfbb35e7f44,
129        0x3fa037bed4d7588f,
130        0x3f95aca6d6d720d6,
131        0x3fa99ea5700d53a5,
132    ];
133
134    let dx2 = DoubleDouble::from_exact_mult(x, x);
135
136    let yw0 = f_fmla(dx2.hi, f64::from_bits(CL[4]), f64::from_bits(CL[3]));
137    let yw1 = f_fmla(dx2.hi, yw0, f64::from_bits(CL[2]));
138    let yw2 = f_fmla(dx2.hi, yw1, f64::from_bits(CL[1]));
139
140    let y2 = dx2.hi * f_fmla(dx2.hi, yw2, f64::from_bits(CL[0]));
141
142    let mut y1 = lpoly_xd_generic(dx2, CH, y2);
143    y1 = DoubleDouble::quick_mult_f64(y1, x);
144    y1 = DoubleDouble::quick_mult(y1, dx2);
145
146    let y0 = DoubleDouble::from_exact_add(x, y1.hi);
147    let mut p = DoubleDouble::from_exact_add(y0.lo, y1.lo);
148
149    let mut t = p.hi.to_bits();
150    if (t & 0x000fffffffffffff) == 0 {
151        let w = p.lo.to_bits();
152        if ((w ^ t) >> 63) != 0 {
153            t = t.wrapping_sub(1);
154        } else {
155            t = t.wrapping_add(1);
156        }
157        p.hi = f64::from_bits(t);
158    }
159    y0.hi + p.hi
160}
161
162#[cold]
163fn atanh_refine(x: f64, a: f64, z: DoubleDouble) -> f64 {
164    let mut t = z.hi.to_bits();
165    let ex: i32 = (t >> 52) as i32;
166    let e = ex.wrapping_sub(0x3ff);
167    t &= 0x000fffffffffffff;
168    t |= 0x3ffu64 << 52;
169    let ed = e as f64;
170    let v = (a - ed + f64::from_bits(0x3ff0000800000000)).to_bits();
171    let i = (v.wrapping_sub(0x3ffu64 << 52)) >> (52 - 16);
172    let i1: i32 = ((i >> 12) as i32) & 0x1f;
173    let i2 = (i >> 8) & 0xf;
174    let i3 = (i >> 4) & 0xf;
175    let i4 = i & 0xf;
176
177    const L20: f64 = f64::from_bits(0x3fd62e42fefa3a00);
178    const L21: f64 = f64::from_bits(0xbcd0ca86c3898d00);
179    const L22: f64 = f64::from_bits(0x397f97b57a079a00);
180
181    let el2 = L22 * ed;
182    let el1 = L21 * ed;
183    let el0 = L20 * ed;
184
185    let ll0i1 = ACOSH_ASINH_LL[0][i1 as usize];
186    let ll1i2 = ACOSH_ASINH_LL[1][i2 as usize];
187    let ll2i3 = ACOSH_ASINH_LL[2][i3 as usize];
188    let ll3i4 = ACOSH_ASINH_LL[3][i4 as usize];
189
190    let mut dl0 = f64::from_bits(ll0i1.0)
191        + f64::from_bits(ll1i2.0)
192        + (f64::from_bits(ll2i3.0) + f64::from_bits(ll3i4.0));
193    let dl1 = f64::from_bits(ll0i1.1)
194        + f64::from_bits(ll1i2.1)
195        + (f64::from_bits(ll2i3.1) + f64::from_bits(ll3i4.1));
196    let dl2 = f64::from_bits(ll0i1.2)
197        + f64::from_bits(ll1i2.2)
198        + (f64::from_bits(ll2i3.2) + f64::from_bits(ll3i4.2));
199    dl0 += el0;
200
201    let t12 = f64::from_bits(ACOSH_SINH_REFINE_T1[i1 as usize])
202        * f64::from_bits(ACOSH_ASINH_REFINE_T2[i2 as usize]);
203    let t34 = f64::from_bits(ACOSH_SINH_REFINE_T3[i3 as usize])
204        * f64::from_bits(ACOSH_ASINH_REFINE_T4[i4 as usize]);
205    let th = t12 * t34;
206    let tl = f_fmla(t12, t34, -th);
207    let dh = th * f64::from_bits(t);
208    let dl = f_fmla(th, f64::from_bits(t), -dh);
209    let sh = tl * f64::from_bits(t);
210    let sl = f_fmla(tl, f64::from_bits(t), -sh);
211    let mut dx = DoubleDouble::from_exact_add(dh - 1., dl);
212
213    t = z.lo.to_bits();
214    t = t.wrapping_sub(((e as i64) << 52) as u64);
215    dx.lo += th * f64::from_bits(t);
216
217    dx = DoubleDouble::add(dx, DoubleDouble::new(sl, sh));
218    const CL: [u64; 3] = [0xbfc0000000000000, 0x3fb9999999a0754f, 0xbfb55555555c3157];
219
220    let slw0 = f_fmla(dx.hi, f64::from_bits(CL[2]), f64::from_bits(CL[1]));
221
222    let sl = dx.hi * f_fmla(dx.hi, slw0, f64::from_bits(CL[0]));
223
224    static CH: [(u64, u64); 3] = [
225        (0x39024b67ee516e3b, 0x3fe0000000000000),
226        (0xb91932ce43199a8d, 0xbfd0000000000000),
227        (0x3c655540c15cf91f, 0x3fc5555555555555),
228    ];
229
230    let mut s = lpoly_xd_generic(dx, CH, sl);
231    s = DoubleDouble::mult(dx, s);
232    s = DoubleDouble::add(s, DoubleDouble::new(el2, el1));
233    s = DoubleDouble::add(s, DoubleDouble::new(dl2, dl1));
234    let mut v02 = DoubleDouble::from_exact_add(dl0, s.hi);
235    let mut v12 = DoubleDouble::from_exact_add(v02.lo, s.lo);
236    t = v12.hi.to_bits();
237    if (t & 0x000fffffffffffff) == 0 {
238        let w = v12.lo.to_bits();
239        if ((w ^ t) >> 63) != 0 {
240            t = t.wrapping_sub(1);
241        } else {
242            t = t.wrapping_add(1);
243        }
244        v12.hi = f64::from_bits(t);
245    }
246    v02.hi *= f64::copysign(1., x);
247    v12.hi *= f64::copysign(1., x);
248
249    v02.hi + v12.hi
250}
251
252/// Hyperbolic arc tangent
253///
254/// Max ULP 0.5
255pub fn f_atanh(x: f64) -> f64 {
256    let ax = x.abs();
257    let ix = ax.to_bits();
258    let aix = ix;
259    if aix >= 0x3ff0000000000000u64 {
260        // |x| >= 1
261        if aix == 0x3ff0000000000000u64 {
262            // |x| = 1
263            return f64::copysign(1., x) / 0.0;
264        }
265        if aix > 0x7ff0000000000000u64 {
266            return x + x;
267        } // nan
268        return (-1.0f64).sqrt();
269    }
270
271    if aix < 0x3fd0000000000000u64 {
272        // |x| < 0x1p-2
273        // atanh(x) rounds to x to nearest for |x| < 0x1.d12ed0af1a27fp-27
274        if aix < 0x3e4d12ed0af1a27fu64 {
275            // |x| < 0x1.d12ed0af1a27fp-27
276            /* We have underflow exactly when 0 < |x| < 2^-1022:
277            for RNDU, atanh(2^-1022-2^-1074) would round to 2^-1022-2^-1075
278            with unbounded exponent range */
279            return f_fmla(x, f64::from_bits(0x3c80000000000000), x);
280        }
281        let x2 = x * x;
282        const C: [u64; 9] = [
283            0x3fc999999999999a,
284            0x3fc2492492492244,
285            0x3fbc71c71c79715f,
286            0x3fb745d16f777723,
287            0x3fb3b13ca4174634,
288            0x3fb110c9724989bd,
289            0x3fae2d17608a5b2e,
290            0x3faa0b56308cba0b,
291            0x3fafb6341208ad2e,
292        ];
293        let dx2: f64 = dd_fmla(x, x, -x2);
294
295        let x4 = x2 * x2;
296        let x3 = x2 * x;
297        let x8 = x4 * x4;
298
299        let zdx3: f64 = dd_fmla(x2, x, -x3);
300
301        let dx3 = f_fmla(dx2, x, zdx3);
302
303        let pw0 = f_fmla(x2, f64::from_bits(C[7]), f64::from_bits(C[6]));
304        let pw1 = f_fmla(x2, f64::from_bits(C[5]), f64::from_bits(C[4]));
305        let pw2 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
306        let pw3 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
307
308        let pw4 = f_fmla(x4, pw0, pw1);
309        let pw5 = f_fmla(x8, f64::from_bits(C[8]), pw4);
310        let pw6 = f_fmla(x4, pw2, pw3);
311
312        let p = f_fmla(x8, pw5, pw6);
313        let t = f64::from_bits(0x3c75555555555555) + x2 * p;
314        let mut p = DoubleDouble::from_exact_add(f64::from_bits(0x3fd5555555555555), t);
315        p = DoubleDouble::mult(p, DoubleDouble::new(dx3, x3));
316        let z0 = DoubleDouble::from_exact_add(x, p.hi);
317        p.lo += z0.lo;
318        p.hi = z0.hi;
319        let eps = x * f_fmla(
320            x4,
321            f64::from_bits(0x3cad000000000000),
322            f64::from_bits(0x3980000000000000),
323        );
324        let lb = p.hi + (p.lo - eps);
325        let ub = p.hi + (p.lo + eps);
326        if lb == ub {
327            return lb;
328        }
329        return as_atanh_zero(x);
330    }
331
332    let p = DoubleDouble::from_exact_add(1.0, ax);
333    let q = DoubleDouble::from_exact_sub(1.0, ax);
334    let iqh = 1.0 / q.hi;
335    let th = p.hi * iqh;
336    let tl = dd_fmla(p.hi, iqh, -th) + (p.lo + p.hi * (dd_fmla(-q.hi, iqh, 1.) - q.lo * iqh)) * iqh;
337
338    const C: [u64; 5] = [
339        0xbff0000000000000,
340        0x3ff5555555555530,
341        0xbfffffffffffffa0,
342        0x40099999e33a6366,
343        0xc01555559ef9525f,
344    ];
345
346    let mut t = th.to_bits();
347    let ex: i32 = (t >> 52) as i32;
348    let e = ex.wrapping_sub(0x3ff);
349    t &= 0x000fffffffffffff;
350    let ed = e as f64;
351    let i = t >> (52 - 5);
352    let d: i64 = (t & 0x00007fffffffffff) as i64;
353    let b_i = ACOSH_ASINH_B[i as usize];
354    let j: u64 = t
355        .wrapping_add((b_i[0] as u64).wrapping_shl(33))
356        .wrapping_add((b_i[1] as i64).wrapping_mul(d >> 16) as u64)
357        >> (52 - 10);
358    t |= 0x3ffu64 << 52;
359    let i1: i32 = (j >> 5) as i32;
360    let i2 = j & 0x1f;
361    let r = (0.5 * f64::from_bits(ACOSH_ASINH_R1[i1 as usize]))
362        * f64::from_bits(ACOSH_ASINH_R2[i2 as usize]);
363    let dx = dd_fmla(r, f64::from_bits(t), -0.5);
364    let dx2 = dx * dx;
365    let rx = r * f64::from_bits(t);
366    let dxl = dd_fmla(r, f64::from_bits(t), -rx);
367
368    let fw0 = f_fmla(dx, f64::from_bits(C[3]), f64::from_bits(C[2]));
369    let fw2 = f_fmla(dx, f64::from_bits(C[1]), f64::from_bits(C[0]));
370    let fw1 = f_fmla(dx2, f64::from_bits(C[4]), fw0);
371
372    let f = dx2 * f_fmla(dx2, fw1, fw2);
373    const L2H: f64 = f64::from_bits(0x3fd62e42fefa3a00);
374    const L2L: f64 = f64::from_bits(0xbcd0ca86c3898d00);
375    let l1i1 = ATANH_L1[i1 as usize];
376    let l1i2 = ATANH_L2[i2 as usize];
377    let lh = f_fmla(L2H, ed, f64::from_bits(l1i1.1) + f64::from_bits(l1i2.1));
378    let mut zl = DoubleDouble::from_exact_add(lh, rx - 0.5);
379
380    zl.lo += f_fmla(L2L, ed, f64::from_bits(l1i1.0) + f64::from_bits(l1i2.0)) + dxl + 0.5 * tl / th;
381    zl.lo += f;
382    zl.hi *= f64::copysign(1., x);
383    zl.lo *= f64::copysign(1., x);
384    let eps = 31e-24 + dx2 * f64::from_bits(0x3ce0000000000000);
385    let lb = zl.hi + (zl.lo - eps);
386    let ub = zl.hi + (zl.lo + eps);
387    if lb == ub {
388        return lb;
389    }
390    let t = DoubleDouble::from_exact_add(th, tl);
391    atanh_refine(
392        x,
393        f64::from_bits(0x40071547652b82fe) * (zl.hi + zl.lo).abs(),
394        t,
395    )
396}
397
398#[cfg(test)]
399mod tests {
400    use super::*;
401
402    #[test]
403    fn test_atanh() {
404        assert_eq!(f_atanh(-0.5000824928283691), -0.5494161408216048);
405        assert_eq!(f_atanh(-0.24218760943040252), -0.24709672810738792);
406    }
407}