1use crate::common::f_fmla;
30
31#[inline]
35pub fn f_tanhf(x: f32) -> f32 {
36 let z = x as f64;
37 let t = x.to_bits();
38 let ux = t;
39 let e = ux.wrapping_shr(23) & 0xff;
40 if e == 0xff {
41 if ux << 9 != 0 {
42 return x + x;
43 } const IR: [f32; 2] = [1.0, -1.0];
45 return IR[ux.wrapping_shr(31) as usize]; }
47 if e < 115 {
48 if e < 102 {
50 if ux.wrapping_shl(1) == 0 {
52 return x;
53 }
54 #[cfg(any(
55 all(
56 any(target_arch = "x86", target_arch = "x86_64"),
57 target_feature = "fma"
58 ),
59 target_arch = "aarch64"
60 ))]
61 {
62 use crate::common::f_fmlaf;
63 let res = f_fmlaf(-x, x.abs(), x);
64 return res;
65 }
66 #[cfg(not(any(
67 all(
68 any(target_arch = "x86", target_arch = "x86_64"),
69 target_feature = "fma"
70 ),
71 target_arch = "aarch64"
72 )))]
73 {
74 let dx = x as f64;
75 let res = crate::common::f_fmla(-dx, dx.abs(), dx);
76 return res as f32;
77 }
78 }
79 #[cfg(any(
80 all(
81 any(target_arch = "x86", target_arch = "x86_64"),
82 target_feature = "fma"
83 ),
84 target_arch = "aarch64"
85 ))]
86 {
87 use crate::common::f_fmlaf;
88 let x2 = x * x;
89 return f_fmlaf(x, -f64::from_bits(0x3fd5555560000000) as f32 * x2, x);
90 }
91 #[cfg(not(any(
92 all(
93 any(target_arch = "x86", target_arch = "x86_64"),
94 target_feature = "fma"
95 ),
96 target_arch = "aarch64"
97 )))]
98 {
99 let dx = x as f64;
100 let x2 = dx * dx;
101 return f_fmla(dx, -f64::from_bits(0x3fd5555560000000) * x2, dx) as f32;
102 }
103 }
104 if ux.wrapping_shl(1) > (0x41102cb3u32 << 1) {
105 return f32::copysign(1.0, x) - f32::copysign(f64::from_bits(0x3e60000000000000) as f32, x);
106 }
107 let z2 = z * z;
108 let z4 = z2 * z2;
109 let z8 = z4 * z4;
110 const CN: [u64; 8] = [
111 0x3ff0000000000000,
112 0x3fc30877b8b72d33,
113 0x3f7694aa09ae9e5e,
114 0x3f14101377abb729,
115 0x3e9e0392b1db0018,
116 0x3e12533756e546f7,
117 0x3d6d62e5abe6ae8a,
118 0x3c9b06be534182de,
119 ];
120 const CD: [u64; 8] = [
121 0x3ff0000000000000,
122 0x3fded99131b0ebea,
123 0x3fa0d27ed6c95a69,
124 0x3f47cbdaca0e9fcc,
125 0x3edb4e60b892578e,
126 0x3e5a6f707c5c71ab,
127 0x3dc35a8b6e2cd94c,
128 0x3d0ca8230677aa01,
129 ];
130 let mut n0 = f_fmla(z2, f64::from_bits(CN[1]), f64::from_bits(CN[0]));
131 let n2 = f_fmla(z2, f64::from_bits(CN[3]), f64::from_bits(CN[2]));
132 let mut n4 = f_fmla(z2, f64::from_bits(CN[5]), f64::from_bits(CN[4]));
133 let n6 = f_fmla(z2, f64::from_bits(CN[7]), f64::from_bits(CN[6]));
134 n0 = f_fmla(z4, n2, n0);
135 n4 = f_fmla(z4, n6, n4);
136 n0 = f_fmla(z8, n4, n0);
137 let mut d0 = f_fmla(z2, f64::from_bits(CD[1]), f64::from_bits(CD[0]));
138 let d2 = f_fmla(z2, f64::from_bits(CD[3]), f64::from_bits(CD[2]));
139 let mut d4 = f_fmla(z2, f64::from_bits(CD[5]), f64::from_bits(CD[4]));
140 let d6 = f_fmla(z2, f64::from_bits(CD[7]), f64::from_bits(CD[6]));
141 d0 = f_fmla(z4, d2, d0);
142 d4 = f_fmla(z4, d6, d4);
143 d0 = f_fmla(z8, d4, d0);
144 let r = z * n0 / d0;
145 r as f32
146}
147
148#[cfg(test)]
149mod tests {
150 use super::*;
151
152 #[test]
153 fn test_tanhf() {
154 assert_eq!(f_tanhf(-0.5), -0.46211717);
155 assert_eq!(f_tanhf(0.5), 0.46211717);
156 assert_eq!(f_tanhf(7.), 0.99999833);
157 }
158}