1use crate::common::{f_fmla, f_fmlaf, set_exponent_f32};
30use crate::polyeval::f_polyeval3;
31
32pub(crate) static LOG2_R: [u64; 128] = [
33 0x0000000000000000,
34 0x3f872c7ba20f7327,
35 0x3f9743ee861f3556,
36 0x3fa184b8e4c56af8,
37 0x3fa77394c9d958d5,
38 0x3fad6ebd1f1febfe,
39 0x3fb1bb32a600549d,
40 0x3fb4c560fe68af88,
41 0x3fb7d60496cfbb4c,
42 0x3fb960caf9abb7ca,
43 0x3fbc7b528b70f1c5,
44 0x3fbf9c95dc1d1165,
45 0x3fc097e38ce60649,
46 0x3fc22dadc2ab3497,
47 0x3fc3c6fb650cde51,
48 0x3fc494f863b8df35,
49 0x3fc633a8bf437ce1,
50 0x3fc7046031c79f85,
51 0x3fc8a8980abfbd32,
52 0x3fc97c1cb13c7ec1,
53 0x3fcb2602497d5346,
54 0x3fcbfc67a7fff4cc,
55 0x3fcdac22d3e441d3,
56 0x3fce857d3d361368,
57 0x3fd01d9bbcfa61d4,
58 0x3fd08bce0d95fa38,
59 0x3fd169c05363f158,
60 0x3fd1d982c9d52708,
61 0x3fd249cd2b13cd6c,
62 0x3fd32bfee370ee68,
63 0x3fd39de8e1559f6f,
64 0x3fd4106017c3eca3,
65 0x3fd4f6fbb2cec598,
66 0x3fd56b22e6b578e5,
67 0x3fd5dfdcf1eeae0e,
68 0x3fd6552b49986277,
69 0x3fd6cb0f6865c8ea,
70 0x3fd7b89f02cf2aad,
71 0x3fd8304d90c11fd3,
72 0x3fd8a8980abfbd32,
73 0x3fd921800924dd3b,
74 0x3fd99b072a96c6b2,
75 0x3fda8ff971810a5e,
76 0x3fdb0b67f4f46810,
77 0x3fdb877c57b1b070,
78 0x3fdc043859e2fdb3,
79 0x3fdc819dc2d45fe4,
80 0x3fdcffae611ad12b,
81 0x3fdd7e6c0abc3579,
82 0x3fddfdd89d586e2b,
83 0x3fde7df5fe538ab3,
84 0x3fdefec61b011f85,
85 0x3fdf804ae8d0cd02,
86 0x3fe0014332be0033,
87 0x3fe042bd4b9a7c99,
88 0x3fe08494c66b8ef0,
89 0x3fe0c6caaf0c5597,
90 0x3fe1096015dee4da,
91 0x3fe14c560fe68af9,
92 0x3fe18fadb6e2d3c2,
93 0x3fe1d368296b5255,
94 0x3fe217868b0c37e8,
95 0x3fe25c0a0463beb0,
96 0x3fe2a0f3c340705c,
97 0x3fe2e644fac04fd8,
98 0x3fe2e644fac04fd8,
99 0x3fe32bfee370ee68,
100 0x3fe37222bb70747c,
101 0x3fe3b8b1c68fa6ed,
102 0x3fe3ffad4e74f1d6,
103 0x3fe44716a2c08262,
104 0x3fe44716a2c08262,
105 0x3fe48eef19317991,
106 0x3fe4d7380dcc422d,
107 0x3fe51ff2e30214bc,
108 0x3fe5692101d9b4a6,
109 0x3fe5b2c3da19723b,
110 0x3fe5b2c3da19723b,
111 0x3fe5fcdce2727ddb,
112 0x3fe6476d98ad990a,
113 0x3fe6927781d932a8,
114 0x3fe6927781d932a8,
115 0x3fe6ddfc2a78fc63,
116 0x3fe729fd26b707c8,
117 0x3fe7767c12967a45,
118 0x3fe7767c12967a45,
119 0x3fe7c37a9227e7fb,
120 0x3fe810fa51bf65fd,
121 0x3fe810fa51bf65fd,
122 0x3fe85efd062c656d,
123 0x3fe8ad846cf369a4,
124 0x3fe8ad846cf369a4,
125 0x3fe8fc924c89ac84,
126 0x3fe94c287492c4db,
127 0x3fe94c287492c4db,
128 0x3fe99c48be2063c8,
129 0x3fe9ecf50bf43f13,
130 0x3fe9ecf50bf43f13,
131 0x3fea3e2f4ac43f60,
132 0x3fea8ff971810a5e,
133 0x3fea8ff971810a5e,
134 0x3feae255819f022d,
135 0x3feb35458761d479,
136 0x3feb35458761d479,
137 0x3feb88cb9a2ab521,
138 0x3feb88cb9a2ab521,
139 0x3febdce9dcc96187,
140 0x3fec31a27dd00b4a,
141 0x3fec31a27dd00b4a,
142 0x3fec86f7b7ea4a89,
143 0x3fec86f7b7ea4a89,
144 0x3fecdcebd2373995,
145 0x3fed338120a6dd9d,
146 0x3fed338120a6dd9d,
147 0x3fed8aba045b01c8,
148 0x3fed8aba045b01c8,
149 0x3fede298ec0bac0d,
150 0x3fede298ec0bac0d,
151 0x3fee3b20546f554a,
152 0x3fee3b20546f554a,
153 0x3fee9452c8a71028,
154 0x3fee9452c8a71028,
155 0x3feeee32e2aeccbf,
156 0x3feeee32e2aeccbf,
157 0x3fef48c34bd1e96f,
158 0x3fef48c34bd1e96f,
159 0x3fefa406bd2443df,
160 0x3ff0000000000000,
161];
162
163#[inline]
167pub fn f_log2f(x: f32) -> f32 {
168 let mut x_u = x.to_bits();
169
170 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
171
172 let mut m = -(E_BIAS as i32);
173 if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
174 if x == 0.0 {
175 return f32::NEG_INFINITY;
176 }
177 if x_u == 0x80000000u32 {
178 return f32::NEG_INFINITY;
179 }
180 if x.is_sign_negative() && !x.is_nan() {
181 return f32::NAN + x;
182 }
183 if x.is_nan() || x.is_infinite() {
185 return x + x;
186 }
187 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
189 m -= 23;
190 }
191
192 m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
193 let mant = x_u & 0x007F_FFFF;
194 let index = mant.wrapping_shr(16);
195
196 x_u = set_exponent_f32(x_u, 0x7F);
197
198 let v;
199 let u = f32::from_bits(x_u);
200
201 #[cfg(any(
202 all(
203 any(target_arch = "x86", target_arch = "x86_64"),
204 target_feature = "fma"
205 ),
206 all(target_arch = "aarch64", target_feature = "neon")
207 ))]
208 {
209 use crate::logs::logf::LOG_REDUCTION_F32;
210 v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; }
212 #[cfg(not(any(
213 all(
214 any(target_arch = "x86", target_arch = "x86_64"),
215 target_feature = "fma"
216 ),
217 all(target_arch = "aarch64", target_feature = "neon")
218 )))]
219 {
220 use crate::logs::LOG_RANGE_REDUCTION;
221 v = f_fmla(
222 u as f64,
223 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
224 -1.0,
225 ); }
227 let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
231
232 const COEFFS: [u64; 5] = [
233 0x3ff71547652b8133,
234 0xbfe71547652d1e33,
235 0x3fdec70a098473de,
236 0xbfd7154c5ccdf121,
237 0x3fd2514fd90a130a,
238 ];
239 let v2 = v * v; let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
241 let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
242 let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
243
244 let r = f_polyeval3(v2, c0, c1, c2);
245 r as f32
246}
247
248#[inline(always)]
252#[allow(dead_code)]
253pub(crate) fn f_log2fx(x: f32) -> f64 {
254 let mut x_u = x.to_bits();
255
256 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
257
258 let mut m = -(E_BIAS as i32);
259 if x_u == 0x3f80_0000u32 {
260 return 0.0;
261 }
262
263 if x_u < f32::MIN_POSITIVE.to_bits() || x_u > f32::MAX.to_bits() {
264 if x == 0.0 {
265 return f64::NEG_INFINITY;
266 }
267 if x_u == 0x80000000u32 {
268 return f64::NEG_INFINITY;
269 }
270 if x.is_sign_negative() && !x.is_nan() {
271 return f64::NAN + x as f64;
272 }
273 if x.is_nan() || x.is_infinite() {
275 return (x + x) as f64;
276 }
277 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
279 m -= 23;
280 }
281
282 m = m.wrapping_add(x_u.wrapping_shr(23) as i32);
283 let mant = x_u & 0x007F_FFFF;
284 let index = mant.wrapping_shr(16);
285
286 x_u = set_exponent_f32(x_u, 0x7F);
287
288 let v;
289 let u = f32::from_bits(x_u);
290
291 #[cfg(any(
292 all(
293 any(target_arch = "x86", target_arch = "x86_64"),
294 target_feature = "fma"
295 ),
296 all(target_arch = "aarch64", target_feature = "neon")
297 ))]
298 {
299 use crate::logs::logf::LOG_REDUCTION_F32;
300 v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; }
302 #[cfg(not(any(
303 all(
304 any(target_arch = "x86", target_arch = "x86_64"),
305 target_feature = "fma"
306 ),
307 all(target_arch = "aarch64", target_feature = "neon")
308 )))]
309 {
310 use crate::logs::log2::LOG_RANGE_REDUCTION;
311 v = f_fmla(
312 u as f64,
313 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
314 -1.0,
315 ); }
317 let extra_factor = m as f64 + f64::from_bits(LOG2_R[index as usize]);
321
322 const COEFFS: [u64; 5] = [
323 0x3ff71547652b8133,
324 0xbfe71547652d1e33,
325 0x3fdec70a098473de,
326 0xbfd7154c5ccdf121,
327 0x3fd2514fd90a130a,
328 ];
329 let v2 = v * v; let c0 = f_fmla(v, f64::from_bits(COEFFS[0]), extra_factor);
331 let c1 = f_fmla(v, f64::from_bits(COEFFS[2]), f64::from_bits(COEFFS[1]));
332 let c2 = f_fmla(v, f64::from_bits(COEFFS[4]), f64::from_bits(COEFFS[3]));
333
334 f_polyeval3(v2, c0, c1, c2)
335}
336
337#[inline(always)]
339#[allow(dead_code)]
340pub(crate) fn dirty_log2f(d: f32) -> f32 {
341 let mut ix = d.to_bits();
342 ix = ix.wrapping_add(0x3f800000 - 0x3f3504f3);
344 let n = (ix >> 23) as i32 - 0x7f;
345 ix = (ix & 0x007fffff).wrapping_add(0x3f3504f3);
346 let a = f32::from_bits(ix);
347
348 let x = (a - 1.) / (a + 1.);
349
350 let x2 = x * x;
351 let mut u = 0.4121985850084821691e+0;
352 u = f_fmlaf(u, x2, 0.5770780163490337802e+0);
353 u = f_fmlaf(u, x2, 0.9617966939259845749e+0);
354 f_fmlaf(x2 * x, u, f_fmlaf(x, 0.2885390081777926802e+1, n as f32))
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360
361 #[test]
362 fn test_log2f() {
363 assert!((f_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
364 assert!((f_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
365 assert_eq!(f_log2f(0.), f32::NEG_INFINITY);
366 assert_eq!(f_log2f(1.0), 0.0);
367 assert!(f_log2f(-1.).is_nan());
368 assert!(f_log2f(f32::NAN).is_nan());
369 assert!(f_log2f(f32::NEG_INFINITY).is_nan());
370 assert_eq!(f_log2f(f32::INFINITY), f32::INFINITY);
371 }
372
373 #[test]
374 fn test_dirty_log2f() {
375 assert!((dirty_log2f(0.35f32) - 0.35f32.log2()).abs() < 1e-5);
376 assert!((dirty_log2f(0.9f32) - 0.9f32.log2()).abs() < 1e-5);
377 }
378}