1use crate::common::f_fmla;
30use crate::rounding::CpuRoundTiesEven;
31use std::hint::black_box;
32
33static TB: [u64; 32] = [
34 0x3fe0000000000000,
35 0x3fe059b0d3158574,
36 0x3fe0b5586cf9890f,
37 0x3fe11301d0125b51,
38 0x3fe172b83c7d517b,
39 0x3fe1d4873168b9aa,
40 0x3fe2387a6e756238,
41 0x3fe29e9df51fdee1,
42 0x3fe306fe0a31b715,
43 0x3fe371a7373aa9cb,
44 0x3fe3dea64c123422,
45 0x3fe44e086061892d,
46 0x3fe4bfdad5362a27,
47 0x3fe5342b569d4f82,
48 0x3fe5ab07dd485429,
49 0x3fe6247eb03a5585,
50 0x3fe6a09e667f3bcd,
51 0x3fe71f75e8ec5f74,
52 0x3fe7a11473eb0187,
53 0x3fe82589994cce13,
54 0x3fe8ace5422aa0db,
55 0x3fe93737b0cdc5e5,
56 0x3fe9c49182a3f090,
57 0x3fea5503b23e255d,
58 0x3feae89f995ad3ad,
59 0x3feb7f76f2fb5e47,
60 0x3fec199bdd85529c,
61 0x3fecb720dcef9069,
62 0x3fed5818dcfba487,
63 0x3fedfc97337b9b5f,
64 0x3feea4afa2a490da,
65 0x3fef50765b6e4540,
66];
67
68#[cold]
69fn coshf_accurate(z: f64, ia: f64, sp: u64, sm: u64) -> f32 {
70 const CH: [u64; 7] = [
71 0x3ff0000000000000,
72 0x3f962e42fefa39ef,
73 0x3f2ebfbdff82c58f,
74 0x3ebc6b08d702e0ed,
75 0x3e43b2ab6fb92e5e,
76 0x3dc5d886e6d54203,
77 0x3d4430976b8ce6ef,
78 ];
79
80 const ILN2H: f64 = f64::from_bits(0x4047154765000000);
81 const ILN2L: f64 = f64::from_bits(0x3e55c17f0bbbe880);
82 let h = f_fmla(ILN2L, z, f_fmla(ILN2H, z, -ia));
83 let h2 = h * h;
84
85 let q0 = f_fmla(h2, f64::from_bits(CH[6]), f64::from_bits(CH[4]));
86 let q1 = f_fmla(h2, f64::from_bits(CH[2]), f64::from_bits(CH[0]));
87
88 let te = f_fmla(h2 * h2, q0, q1);
89
90 let j0 = f_fmla(h2, f64::from_bits(CH[5]), f64::from_bits(CH[3]));
91
92 let to = f_fmla(h2, j0, f64::from_bits(CH[1]));
93
94 let z0 = f_fmla(h, to, te);
95 let z1 = f_fmla(-h, to, te);
96 f_fmla(f64::from_bits(sp), z0, f64::from_bits(sm) * z1) as f32
97}
98
99#[inline]
103pub fn f_coshf(x: f32) -> f32 {
104 const C: [u64; 4] = [
105 0x3ff0000000000000,
106 0x3f962e42fef4c4e7,
107 0x3f2ebfd1b232f475,
108 0x3ebc6b19384ecd93,
109 ];
110
111 const I_LN2: f64 = f64::from_bits(0x40471547652b82fe);
112 let t = x.to_bits();
113 let z = x as f64;
114 let ax = t.wrapping_shl(1);
115 if ax > 0x8565a9f8u32 {
116 if ax >= 0xff000000u32 {
118 if ax.wrapping_shl(8) != 0 {
119 return x + x;
120 } return f32::INFINITY; }
123 let r = black_box(2.0) * black_box(f64::from_bits(0x47efffffe0000000) as f32);
124 return r;
125 }
126 if ax < 0x7c000000u32 {
127 if ax < 0x74000000u32 {
129 if ax < 0x66000000u32 {
131 #[cfg(any(
133 all(
134 any(target_arch = "x86", target_arch = "x86_64"),
135 target_feature = "fma"
136 ),
137 target_arch = "aarch64"
138 ))]
139 {
140 use crate::common::f_fmlaf;
141 return f_fmlaf(x.abs(), f64::from_bits(0x3e60000000000000) as f32, 1.0);
142 }
143 #[cfg(not(any(
144 all(
145 any(target_arch = "x86", target_arch = "x86_64"),
146 target_feature = "fma"
147 ),
148 target_arch = "aarch64"
149 )))]
150 {
151 let dx = x as f64;
152 return f_fmla(dx.abs(), f64::from_bits(0x3e60000000000000), 1.0) as f32;
153 }
154 }
155 #[cfg(any(
156 all(
157 any(target_arch = "x86", target_arch = "x86_64"),
158 target_feature = "fma"
159 ),
160 target_arch = "aarch64"
161 ))]
162 {
163 use crate::common::f_fmlaf;
164 return f_fmlaf(0.5 * x, x, 1.0);
165 }
166 #[cfg(not(any(
167 all(
168 any(target_arch = "x86", target_arch = "x86_64"),
169 target_feature = "fma"
170 ),
171 target_arch = "aarch64"
172 )))]
173 {
174 let dx = x as f64;
175 return f_fmla(0.5 * dx, dx, 1.0) as f32;
176 }
177 }
178 const CP: [u64; 4] = [
179 0x3fdfffffffffffe3,
180 0x3fa55555555723cf,
181 0x3f56c16bee4a5986,
182 0x3efa0483fc0328f7,
183 ];
184 let z2 = z * z;
185 let z4 = z2 * z2;
186 let w0 = f_fmla(z2, f64::from_bits(CP[1]), f64::from_bits(CP[0]));
187 let w1 = f_fmla(z2, f64::from_bits(CP[3]), f64::from_bits(CP[2]));
188 return f_fmla(z2, f_fmla(z4, w1, w0), 1.0) as f32;
189 }
190 let a = I_LN2 * z;
191 let ia = a.cpu_round_ties_even();
192 let h = a - ia;
193 let h2 = h * h;
194 let ja = (ia + f64::from_bits(0x4338000000000000)).to_bits();
195 let jp: i64 = ja as i64;
196 let jm = -jp;
197 let sp = TB[(jp & 31) as usize].wrapping_add(jp.wrapping_shr(5).wrapping_shl(52) as u64);
198 let sm = TB[(jm & 31) as usize].wrapping_add(jm.wrapping_shr(5).wrapping_shl(52) as u64);
199 let te = f_fmla(h2, f64::from_bits(C[2]), f64::from_bits(C[0]));
200 let to = f_fmla(h2, f64::from_bits(C[3]), f64::from_bits(C[1]));
201 let rp = f64::from_bits(sp) * f_fmla(h, to, te);
202 let rm = f64::from_bits(sm) * f_fmla(-h, to, te);
203 let r = rp + rm;
204 let ub = r;
205 let lb = r - f64::from_bits(0x3de3edbbe4560327) * r;
206 if ub != lb {
208 return coshf_accurate(z, ia, sp, sm);
209 }
210 ub as f32
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 #[test]
218 fn test_coshf() {
219 assert_eq!(f_coshf(-0.5), 1.127626);
220 assert_eq!(f_coshf(0.5), 1.127626);
221 assert_eq!(f_coshf(7.), 548.317);
222 }
223}