pxfm/hyperbolic/
tanh.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::common::{dd_fmla, f_fmla};
30use crate::double_double::DoubleDouble;
31use crate::exponents::{EXP_REDUCE_T0, EXP_REDUCE_T1};
32use crate::hyperbolic::acosh::lpoly_xd_generic;
33use crate::hyperbolic::sinh::hyperbolic_exp_accurate;
34
35#[cold]
36fn as_tanh_zero(x: f64) -> f64 {
37    static CH: [(u64, u64); 10] = [
38        (0xbc75555555555555, 0xbfd5555555555555),
39        (0x3c41111111110916, 0x3fc1111111111111),
40        (0x3c47917917a46f2c, 0xbfaba1ba1ba1ba1c),
41        (0xbc09a52a06f1e599, 0x3f9664f4882c10fa),
42        (0x3c2c297394c24e38, 0xbf8226e355e6c23d),
43        (0xbc0311087e5b1526, 0x3f6d6d3d0e157de0),
44        (0xbbe2868cde54ea0c, 0xbf57da36452b75e1),
45        (0x3bd2cd8fc406c3f7, 0x3f4355824803667b),
46        (0x3b9da22861b4ca80, 0xbf2f57d7734c821d),
47        (0xbbb0831108273a74, 0x3f1967e18ad3facf),
48    ];
49    let dx2 = DoubleDouble::from_exact_mult(x, x);
50    const CL: [u64; 6] = [
51        0xbf0497d8e6462927,
52        0x3ef0b1318c243bd7,
53        0xbedb0f2935e9a120,
54        0x3ec5e9444536e654,
55        0xbeb174ff2a31908c,
56        0x3e9749698c8d338d,
57    ];
58
59    let yw0 = f_fmla(dx2.hi, f64::from_bits(CL[5]), f64::from_bits(CL[4]));
60    let yw1 = f_fmla(dx2.hi, yw0, f64::from_bits(CL[3]));
61    let yw2 = f_fmla(dx2.hi, yw1, f64::from_bits(CL[2]));
62    let yw3 = f_fmla(dx2.hi, yw2, f64::from_bits(CL[1]));
63    let yw4 = f_fmla(dx2.hi, yw3, f64::from_bits(CL[0]));
64
65    let y2 = dx2.hi * yw4;
66
67    let mut y1 = lpoly_xd_generic(dx2, CH, y2);
68    y1 = DoubleDouble::quick_mult_f64(y1, x);
69    y1 = DoubleDouble::quick_mult(y1, dx2); // y2 = y1.l
70    let y0 = DoubleDouble::from_exact_add(x, y1.hi); // y0 = y0.hi
71    let mut p = DoubleDouble::from_exact_add(y0.lo, y1.lo);
72    let mut t = p.hi.to_bits();
73    if (t & 0x000fffffffffffff) == 0 {
74        let w = p.lo.to_bits();
75        if ((w ^ t) >> 63) != 0 {
76            t = t.wrapping_sub(1);
77        } else {
78            t = t.wrapping_add(1);
79        }
80        p.hi = f64::from_bits(t);
81    }
82    y0.hi + p.hi
83}
84
85/// Hyperbolic tan
86///
87/// Max ULP 0.5
88pub fn f_tanh(x: f64) -> f64 {
89    /*
90      The function tanh(x) is approximated by minimax polynomial for
91      |x|<0.25.  For other values we use this identity tanh(|x|) = 1 -
92      2*exp(-2*|x|)/(1 + exp(-2*|x|)).  For large |x|>3.683 the term
93      2*exp(-2*|x|)/(1 + exp(-2*|x|)) becomes small and we can use less
94      precise formula for exponent.
95    */
96    let ax = x.abs();
97    let ix = ax.to_bits();
98    let aix: u64 = ix;
99    /* for |x| >= 0x1.30fc1931f09cap+4, tanh(x) rounds to +1 or -1 to nearest,
100    this avoid a spurious overflow in the computation of v0 below */
101    if aix >= 0x40330fc1931f09cau64 {
102        if aix > 0x7ff0000000000000u64 {
103            return x + x;
104        } // nan
105        let f = f64::copysign(1.0, x);
106        if aix == 0x7ff0000000000000u64 {
107            return f;
108        }
109        let df = f64::copysign(f64::from_bits(0x3c80000000000000), x);
110        return f - df;
111    }
112    const S: f64 = f64::from_bits(0xc0c71547652b82fe);
113    let v0 = dd_fmla(ax, S, f64::from_bits(0x4188000004000000));
114    let jt = v0.to_bits();
115    let v = v0.to_bits() & 0xfffffffff8000000;
116    let t = f64::from_bits(v) - f64::from_bits(0x4188000000000000);
117
118    let i1: i64 = ((jt >> 27) & 0x3f) as i64;
119    let i0 = (jt >> 33) & 0x3f;
120    let ie = ((jt.wrapping_shl(13)) >> 52) as i64;
121    let sp = (1023i64.wrapping_add(ie) as u64).wrapping_shl(52);
122    const CH: [u64; 4] = [
123        0x4000000000000000,
124        0x4000000000000000,
125        0x3ff55555557e54ff,
126        0x3fe55555553a12f4,
127    ];
128    let t0h = f64::from_bits(EXP_REDUCE_T0[i0 as usize].1);
129    let t1h = f64::from_bits(EXP_REDUCE_T1[i1 as usize].1);
130    let mut th = t0h * t1h;
131    let mut tl;
132    if aix < 0x400d76c8b4395810u64 {
133        // |x| ~< 3.683
134        if aix < 0x3fd0000000000000u64 {
135            // |x| < 0x1p-2
136            if aix < 0x3e10000000000000u64 {
137                // |x| < 0x1p-30
138                if aix < 0x3df0000000000000u64 {
139                    // |x| < 0x1p-32
140                    if aix == 0 {
141                        return x;
142                    }
143                    /* We have underflow when 0 < |x| < 2^-1022 or when |x| = 2^-1022
144                    and rounding towards zero. */
145                    let res = dd_fmla(x, f64::from_bits(0xbc80000000000000), x);
146                    return res;
147                }
148                let x3 = x * x * x;
149                return x - x3 / 3.;
150            }
151
152            const C: [u64; 8] = [
153                0xbfd5555555555554,
154                0x3fc1111111110d61,
155                0xbfaba1ba1b983d8b,
156                0x3f9664f4820e99f0,
157                0xbf8226e11e4ac7cf,
158                0x3f6d6c4ab70668b6,
159                0xbf57bbecb57ce996,
160                0x3f41451443697dd8,
161            ];
162            let x2 = x * x;
163            let x3 = x2 * x;
164            let x4 = x2 * x2;
165            let x8 = x4 * x4;
166
167            let p1w0 = f_fmla(x2, f64::from_bits(C[7]), f64::from_bits(C[6]));
168            let p1w1 = f_fmla(x2, f64::from_bits(C[5]), f64::from_bits(C[4]));
169
170            let p0w0 = f_fmla(x2, f64::from_bits(C[3]), f64::from_bits(C[2]));
171            let p0w1 = f_fmla(x2, f64::from_bits(C[1]), f64::from_bits(C[0]));
172
173            let p1 = f_fmla(x4, p1w0, p1w1);
174            let mut p0 = f_fmla(x4, p0w0, p0w1);
175            p0 += x8 * p1;
176            p0 *= x3;
177            let r = DoubleDouble::from_exact_add(x, p0);
178            let e = x3 * f64::from_bits(0x3cba000000000000);
179            let lb = r.hi + (r.lo - e);
180            let ub = r.hi + (r.lo + e);
181            if lb == ub {
182                return lb;
183            }
184            return as_tanh_zero(x);
185        }
186
187        let t0l = f64::from_bits(EXP_REDUCE_T0[i0 as usize].0);
188        let t1l = f64::from_bits(EXP_REDUCE_T1[i1 as usize].0);
189        tl = f_fmla(t0h, t1l, t1h * t0l) + dd_fmla(t0h, t1h, -th);
190        th *= f64::from_bits(sp);
191        tl *= f64::from_bits(sp);
192        const L2H: f64 = f64::from_bits(0xbf162e42ff000000);
193        const L2L: f64 = f64::from_bits(0xbcf718432a1b0e26);
194        let dx = f_fmla(-L2L, t, f_fmla(L2H, t, -ax));
195        let dx2 = dx * dx;
196
197        let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
198        let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
199
200        let p = dx * f_fmla(dx2, pw0, pw1);
201        let mut rh = th;
202        let mut rl = tl + rh * p;
203        let mut r = DoubleDouble::from_exact_add(rh, rl);
204
205        let ph = r.hi;
206        let pl = r.lo;
207        let mut qh = r.hi;
208        let mut ql = r.lo;
209        let qq = DoubleDouble::from_exact_add(1.0, qh);
210        qh = qq.hi;
211        ql += qq.lo;
212
213        let rqh = 1.0 / qh;
214        let rql = f_fmla(ql, rqh, dd_fmla(rqh, qh, -1.)) * -rqh;
215        let p = DoubleDouble::mult(DoubleDouble::new(pl, ph), DoubleDouble::new(rql, rqh));
216
217        let e = r.hi * f64::from_bits(0x3c10000000000000);
218        r = DoubleDouble::from_exact_sub(0.5, p.hi);
219        r.lo -= p.lo;
220        rh = r.hi * f64::copysign(2., x);
221        rl = r.lo * f64::copysign(2., x);
222        let lb = rh + (rl - e);
223        let ub = rh + (rl + e);
224        if lb == ub {
225            return lb;
226        }
227    } else {
228        const L2: f64 = f64::from_bits(0xbf162e42fefa39ef);
229        let dx = dd_fmla(L2, t, -ax);
230        let dx2 = dx * dx;
231
232        let pw0 = f_fmla(dx, f64::from_bits(CH[3]), f64::from_bits(CH[2]));
233        let pw1 = f_fmla(dx, f64::from_bits(CH[1]), f64::from_bits(CH[0]));
234
235        let p = dx * f_fmla(dx2, pw0, pw1);
236        let mut rh = th * f64::from_bits(sp);
237        rh += (p + ((2. * f64::from_bits(0x3c83000000000000)) * ax)) * rh;
238        let e = rh * f64::from_bits(0x3ce1000000000000);
239        rh = (2. * rh) / (1. + rh);
240        let one = f64::copysign(1., x);
241        rh = f64::copysign(rh, x);
242        let lb = one - (rh + e);
243        let ub = one - (rh - e);
244        if lb == ub {
245            return lb;
246        }
247
248        let t0l = f64::from_bits(EXP_REDUCE_T0[i0 as usize].0);
249        let t1l = f64::from_bits(EXP_REDUCE_T1[i1 as usize].0);
250        tl = f_fmla(t0h, t1l, t1h * t0l) + dd_fmla(t0h, t1h, -th);
251        th *= f64::from_bits(sp);
252        tl *= f64::from_bits(sp);
253    }
254
255    let mut r = hyperbolic_exp_accurate(-2. * ax, t, DoubleDouble::new(tl, th));
256    let mut q = DoubleDouble::from_exact_add(1.0, r.hi);
257    q.lo += r.lo;
258    q = DoubleDouble::from_exact_add(q.hi, q.lo);
259    let rqh = 1. / q.hi;
260    let rql = f_fmla(q.lo, rqh, dd_fmla(rqh, q.hi, -1.)) * -rqh;
261    let p = DoubleDouble::mult(r, DoubleDouble::new(rql, rqh));
262    r = DoubleDouble::from_exact_sub(0.5, p.hi);
263    r.lo -= p.lo;
264    r = DoubleDouble::from_exact_add(r.hi, r.lo);
265    f_fmla(f64::copysign(2., x), r.hi, f64::copysign(2., x) * r.lo)
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271
272    #[test]
273    fn test_tanh() {
274        assert_eq!(f_tanh(4.799980150947863), 0.9998645463239773);
275        assert_eq!(f_tanh(2.549980150947863), 0.9878799187977153);
276    }
277}