1use crate::bits::min_normal_f32;
30use crate::common::*;
31use crate::polyeval::f_polyeval3;
32use std::hint::black_box;
33
34#[repr(C, align(8))]
35pub(crate) struct LogReductionF32Aligned(pub(crate) [u32; 128]);
36
37pub(crate) static LOG_REDUCTION_F32: LogReductionF32Aligned = LogReductionF32Aligned([
38 0x3f800000, 0x3f7e0000, 0x3f7c0000, 0x3f7a0000, 0x3f780000, 0x3f760000, 0x3f740000, 0x3f720000,
39 0x3f700000, 0x3f6f0000, 0x3f6d0000, 0x3f6b0000, 0x3f6a0000, 0x3f680000, 0x3f660000, 0x3f650000,
40 0x3f630000, 0x3f620000, 0x3f600000, 0x3f5f0000, 0x3f5d0000, 0x3f5c0000, 0x3f5a0000, 0x3f590000,
41 0x3f570000, 0x3f560000, 0x3f540000, 0x3f530000, 0x3f520000, 0x3f500000, 0x3f4f0000, 0x3f4e0000,
42 0x3f4c0000, 0x3f4b0000, 0x3f4a0000, 0x3f490000, 0x3f480000, 0x3f460000, 0x3f450000, 0x3f440000,
43 0x3f430000, 0x3f420000, 0x3f400000, 0x3f3f0000, 0x3f3e0000, 0x3f3d0000, 0x3f3c0000, 0x3f3b0000,
44 0x3f3a0000, 0x3f390000, 0x3f380000, 0x3f370000, 0x3f360000, 0x3f350000, 0x3f340000, 0x3f330000,
45 0x3f320000, 0x3f310000, 0x3f300000, 0x3f2f0000, 0x3f2e0000, 0x3f2d0000, 0x3f2c0000, 0x3f2b0000,
46 0x3f2a0000, 0x3f2a0000, 0x3f290000, 0x3f280000, 0x3f270000, 0x3f260000, 0x3f250000, 0x3f250000,
47 0x3f240000, 0x3f230000, 0x3f220000, 0x3f210000, 0x3f200000, 0x3f200000, 0x3f1f0000, 0x3f1e0000,
48 0x3f1d0000, 0x3f1d0000, 0x3f1c0000, 0x3f1b0000, 0x3f1a0000, 0x3f1a0000, 0x3f190000, 0x3f180000,
49 0x3f180000, 0x3f170000, 0x3f160000, 0x3f160000, 0x3f150000, 0x3f140000, 0x3f140000, 0x3f130000,
50 0x3f120000, 0x3f120000, 0x3f110000, 0x3f100000, 0x3f100000, 0x3f0f0000, 0x3f0e0000, 0x3f0e0000,
51 0x3f0d0000, 0x3f0d0000, 0x3f0c0000, 0x3f0b0000, 0x3f0b0000, 0x3f0a0000, 0x3f0a0000, 0x3f090000,
52 0x3f080000, 0x3f080000, 0x3f070000, 0x3f070000, 0x3f060000, 0x3f060000, 0x3f050000, 0x3f050000,
53 0x3f040000, 0x3f040000, 0x3f030000, 0x3f030000, 0x3f020000, 0x3f020000, 0x3f010000, 0x3f000000,
54]);
55
56static LOG_R: [u64; 128] = [
57 0x0000000000000000,
58 0x3f8010157588de71,
59 0x3f90205658935847,
60 0x3f98492528c8cabf,
61 0x3fa0415d89e74444,
62 0x3fa466aed42de3ea,
63 0x3fa894aa149fb343,
64 0x3faccb73cdddb2cc,
65 0x3fb08598b59e3a07,
66 0x3fb1973bd1465567,
67 0x3fb3bdf5a7d1ee64,
68 0x3fb5e95a4d9791cb,
69 0x3fb700d30aeac0e1,
70 0x3fb9335e5d594989,
71 0x3fbb6ac88dad5b1c,
72 0x3fbc885801bc4b23,
73 0x3fbec739830a1120,
74 0x3fbfe89139dbd566,
75 0x3fc1178e8227e47c,
76 0x3fc1aa2b7e23f72a,
77 0x3fc2d1610c86813a,
78 0x3fc365fcb0159016,
79 0x3fc4913d8333b561,
80 0x3fc527e5e4a1b58d,
81 0x3fc6574ebe8c133a,
82 0x3fc6f0128b756abc,
83 0x3fc823c16551a3c2,
84 0x3fc8beafeb38fe8c,
85 0x3fc95a5adcf7017f,
86 0x3fca93ed3c8ad9e3,
87 0x3fcb31d8575bce3d,
88 0x3fcbd087383bd8ad,
89 0x3fcd1037f2655e7b,
90 0x3fcdb13db0d48940,
91 0x3fce530effe71012,
92 0x3fcef5ade4dcffe6,
93 0x3fcf991c6cb3b379,
94 0x3fd07138604d5862,
95 0x3fd0c42d676162e3,
96 0x3fd1178e8227e47c,
97 0x3fd16b5ccbacfb73,
98 0x3fd1bf99635a6b95,
99 0x3fd269621134db92,
100 0x3fd2bef07cdc9354,
101 0x3fd314f1e1d35ce4,
102 0x3fd36b6776be1117,
103 0x3fd3c25277333184,
104 0x3fd419b423d5e8c7,
105 0x3fd4718dc271c41b,
106 0x3fd4c9e09e172c3c,
107 0x3fd522ae0738a3d8,
108 0x3fd57bf753c8d1fb,
109 0x3fd5d5bddf595f30,
110 0x3fd630030b3aac49,
111 0x3fd68ac83e9c6a14,
112 0x3fd6e60ee6af1972,
113 0x3fd741d876c67bb1,
114 0x3fd79e26687cfb3e,
115 0x3fd7fafa3bd8151c,
116 0x3fd85855776dcbfb,
117 0x3fd8b639a88b2df5,
118 0x3fd914a8635bf68a,
119 0x3fd973a3431356ae,
120 0x3fd9d32bea15ed3b,
121 0x3fda33440224fa79,
122 0x3fda33440224fa79,
123 0x3fda93ed3c8ad9e3,
124 0x3fdaf5295248cdd0,
125 0x3fdb56fa04462909,
126 0x3fdbb9611b80e2fb,
127 0x3fdc1c60693fa39e,
128 0x3fdc1c60693fa39e,
129 0x3fdc7ff9c74554c9,
130 0x3fdce42f18064743,
131 0x3fdd490246defa6b,
132 0x3fddae75484c9616,
133 0x3fde148a1a2726ce,
134 0x3fde148a1a2726ce,
135 0x3fde7b42c3ddad73,
136 0x3fdee2a156b413e5,
137 0x3fdf4aa7ee03192d,
138 0x3fdf4aa7ee03192d,
139 0x3fdfb358af7a4884,
140 0x3fe00e5ae5b207ab,
141 0x3fe04360be7603ad,
142 0x3fe04360be7603ad,
143 0x3fe078bf0533c568,
144 0x3fe0ae76e2d054fa,
145 0x3fe0ae76e2d054fa,
146 0x3fe0e4898611cce1,
147 0x3fe11af823c75aa8,
148 0x3fe11af823c75aa8,
149 0x3fe151c3f6f29612,
150 0x3fe188ee40f23ca6,
151 0x3fe188ee40f23ca6,
152 0x3fe1c07849ae6007,
153 0x3fe1f8635fc61659,
154 0x3fe1f8635fc61659,
155 0x3fe230b0d8bebc98,
156 0x3fe269621134db92,
157 0x3fe269621134db92,
158 0x3fe2a2786d0ec107,
159 0x3fe2dbf557b0df43,
160 0x3fe2dbf557b0df43,
161 0x3fe315da4434068b,
162 0x3fe315da4434068b,
163 0x3fe35028ad9d8c86,
164 0x3fe38ae2171976e7,
165 0x3fe38ae2171976e7,
166 0x3fe3c6080c36bfb5,
167 0x3fe3c6080c36bfb5,
168 0x3fe4019c2125ca93,
169 0x3fe43d9ff2f923c5,
170 0x3fe43d9ff2f923c5,
171 0x3fe47a1527e8a2d3,
172 0x3fe47a1527e8a2d3,
173 0x3fe4b6fd6f970c1f,
174 0x3fe4b6fd6f970c1f,
175 0x3fe4f45a835a4e19,
176 0x3fe4f45a835a4e19,
177 0x3fe5322e26867857,
178 0x3fe5322e26867857,
179 0x3fe5707a26bb8c66,
180 0x3fe5707a26bb8c66,
181 0x3fe5af405c3649e0,
182 0x3fe5af405c3649e0,
183 0x3fe5ee82aa241920,
184 0x0000000000000000,
185];
186
187#[inline]
191pub fn f_logf(x: f32) -> f32 {
192 let mut x_u = x.to_bits();
193
194 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
195
196 let mut m = -(E_BIAS as i32);
197 if x_u < 0x4c5d65a5u32 {
198 if x_u == 0x3f7f4d6fu32 {
199 return black_box(f64::from_bits(0xbf6659ec80000000) as f32) + min_normal_f32(true);
200 } else if x_u == 0x41178febu32 {
201 return black_box(f64::from_bits(0x4001fcbce0000000) as f32) + min_normal_f32(true);
202 } else if x_u == 0x3f800000u32 {
203 return 0.;
204 } else if x_u == 0x1e88452du32 {
205 return black_box(f64::from_bits(0xc046d7b180000000) as f32) + min_normal_f32(true);
206 }
207 if x_u < f32::MIN_POSITIVE.to_bits() {
208 if x == 0.0 {
209 return f32::NEG_INFINITY;
210 }
211 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
213 m -= 23;
214 }
215 } else {
216 if x_u == 0x4c5d65a5u32 {
217 return black_box(f32::from_bits(0x418f034b)) + min_normal_f32(true);
218 } else if x_u == 0x65d890d3u32 {
219 return black_box(f32::from_bits(0x4254d1f9)) + min_normal_f32(true);
220 } else if x_u == 0x6f31a8ecu32 {
221 return black_box(f32::from_bits(0x42845a89)) + min_normal_f32(true);
222 } else if x_u == 0x7a17f30au32 {
223 return black_box(f32::from_bits(0x42a28a1b)) + min_normal_f32(true);
224 } else if x_u == 0x500ffb03u32 {
225 return black_box(f32::from_bits(0x41b7ee9a)) + min_normal_f32(true);
226 } else if x_u == 0x5cd69e88u32 {
227 return black_box(f32::from_bits(0x4222e0a3)) + min_normal_f32(true);
228 } else if x_u == 0x5ee8984eu32 {
229 return black_box(f32::from_bits(0x422e4a21)) + min_normal_f32(true);
230 }
231
232 if x_u > f32::MAX.to_bits() {
233 if x_u == 0x80000000u32 {
234 return f32::NEG_INFINITY;
235 }
236 if x.is_sign_negative() && !x.is_nan() {
237 return f32::NAN + x;
238 }
239 if x.is_nan() {
241 return f32::NAN;
242 }
243
244 return x;
245 }
246 }
247
248 let mant = x_u & 0x007F_FFFF;
249 let index = mant.wrapping_shr(16);
251 m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
254 x_u = set_exponent_f32(x_u, 0x7F);
255
256 let v;
257 let u = f32::from_bits(x_u);
258
259 #[cfg(any(
260 all(
261 any(target_arch = "x86", target_arch = "x86_64"),
262 target_feature = "fma"
263 ),
264 all(target_arch = "aarch64", target_feature = "neon")
265 ))]
266 {
267 v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; }
269 #[cfg(not(any(
270 all(
271 any(target_arch = "x86", target_arch = "x86_64"),
272 target_feature = "fma"
273 ),
274 all(target_arch = "aarch64", target_feature = "neon")
275 )))]
276 {
277 use crate::logs::log2::LOG_RANGE_REDUCTION;
278 v = f_fmla(
279 u as f64,
280 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
281 -1.0,
282 ); }
284 const COEFFS: [u64; 4] = [
287 0xbfe000000000fe63,
288 0x3fd555556e963c16,
289 0xbfd000028dedf986,
290 0x3fc966681bfda7f7,
291 ];
292 let v2 = v * v; let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
294 let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
295 let p0 = f64::from_bits(LOG_R[index as usize]) + v;
296 const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
297 let r = f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2));
298 r as f32
299}
300
301#[inline]
302pub(crate) fn fast_logf(x: f32) -> f64 {
303 let mut x_u = x.to_bits();
304 const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
305 let mut m = -(E_BIAS as i32);
306 if x_u < f32::MIN_POSITIVE.to_bits() {
307 x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
309 m -= 23;
310 }
311
312 let mant = x_u & 0x007F_FFFF;
313 let index = mant.wrapping_shr(16);
315 m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
318 x_u = set_exponent_f32(x_u, 0x7F);
319
320 let v;
321 let u = f32::from_bits(x_u);
322
323 #[cfg(any(
324 all(
325 any(target_arch = "x86", target_arch = "x86_64"),
326 target_feature = "fma"
327 ),
328 all(target_arch = "aarch64", target_feature = "neon")
329 ))]
330 {
331 v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; }
333 #[cfg(not(any(
334 all(
335 any(target_arch = "x86", target_arch = "x86_64"),
336 target_feature = "fma"
337 ),
338 all(target_arch = "aarch64", target_feature = "neon")
339 )))]
340 {
341 use crate::logs::log2::LOG_RANGE_REDUCTION;
342 v = f_fmla(
343 u as f64,
344 f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
345 -1.0,
346 ); }
348 const COEFFS: [u64; 4] = [
351 0xbfe000000000fe63,
352 0x3fd555556e963c16,
353 0xbfd000028dedf986,
354 0x3fc966681bfda7f7,
355 ];
356 let v2 = v * v; let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
358 let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
359 let p0 = f64::from_bits(LOG_R[index as usize]) + v;
360 const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
361 f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2))
362}
363
364#[inline]
367pub const fn logf(d: f32) -> f32 {
368 let ux = d.to_bits();
369 #[allow(clippy::collapsible_if)]
370 if ux < (1 << 23) || ux >= 0x7f800000u32 {
371 if ux == 0 || ux >= 0x7f800000u32 {
372 if ux == 0x7f800000u32 {
373 return d;
374 }
375 let ax = ux.wrapping_shl(1);
376 if ax == 0u32 {
377 return f32::NEG_INFINITY;
379 }
380 if ax > 0xff000000u32 {
381 return d + d;
382 } return f32::NAN;
384 }
385 }
386
387 let mut ix = d.to_bits();
388 ix += 0x3f800000 - 0x3f3504f3;
390 let n = (ix >> 23) as i32 - 0x7f;
391 ix = (ix & 0x007fffff) + 0x3f3504f3;
392 let a = f32::from_bits(ix) as f64;
393
394 let x = (a - 1.) / (a + 1.);
395 let x2 = x * x;
396 let mut u = 0.2222220222147750310e+0;
397 u = fmla(u, x2, 0.2857142871244668543e+0);
398 u = fmla(u, x2, 0.3999999999950960318e+0);
399 u = fmla(u, x2, 0.6666666666666734090e+0);
400 u = fmla(u, x2, 2.);
401 fmla(x, u, std::f64::consts::LN_2 * (n as f64)) as f32
402}
403
404#[cfg(test)]
405mod tests {
406 use super::*;
407
408 #[test]
409 fn test_logf() {
410 assert!(
411 (logf(1f32) - 0f32).abs() < 1e-6,
412 "Invalid result {}",
413 logf(1f32)
414 );
415 assert!(
416 (logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
417 "Invalid result {}",
418 logf(5f32)
419 );
420 assert_eq!(logf(0.), f32::NEG_INFINITY);
421 assert!(logf(-1.).is_nan());
422 assert!(logf(f32::NAN).is_nan());
423 assert!(logf(f32::NEG_INFINITY).is_nan());
424 assert_eq!(logf(f32::INFINITY), f32::INFINITY);
425 }
426
427 #[test]
428 fn test_flogf() {
429 assert!(
430 (f_logf(1f32) - 0f32).abs() < 1e-6,
431 "Invalid result {}",
432 f_logf(1f32)
433 );
434 assert!(
435 (f_logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
436 "Invalid result {}",
437 f_logf(5f32)
438 );
439 assert_eq!(f_logf(0.), f32::NEG_INFINITY);
440 assert!(f_logf(-1.).is_nan());
441 assert!(f_logf(f32::NAN).is_nan());
442 assert!(f_logf(f32::NEG_INFINITY).is_nan());
443 assert_eq!(f_logf(f32::INFINITY), f32::INFINITY);
444 }
445}