pxfm/logs/
logf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::min_normal_f32;
30use crate::common::*;
31use crate::polyeval::f_polyeval3;
32use std::hint::black_box;
33
34#[repr(C, align(8))]
35pub(crate) struct LogReductionF32Aligned(pub(crate) [u32; 128]);
36
37pub(crate) static LOG_REDUCTION_F32: LogReductionF32Aligned = LogReductionF32Aligned([
38    0x3f800000, 0x3f7e0000, 0x3f7c0000, 0x3f7a0000, 0x3f780000, 0x3f760000, 0x3f740000, 0x3f720000,
39    0x3f700000, 0x3f6f0000, 0x3f6d0000, 0x3f6b0000, 0x3f6a0000, 0x3f680000, 0x3f660000, 0x3f650000,
40    0x3f630000, 0x3f620000, 0x3f600000, 0x3f5f0000, 0x3f5d0000, 0x3f5c0000, 0x3f5a0000, 0x3f590000,
41    0x3f570000, 0x3f560000, 0x3f540000, 0x3f530000, 0x3f520000, 0x3f500000, 0x3f4f0000, 0x3f4e0000,
42    0x3f4c0000, 0x3f4b0000, 0x3f4a0000, 0x3f490000, 0x3f480000, 0x3f460000, 0x3f450000, 0x3f440000,
43    0x3f430000, 0x3f420000, 0x3f400000, 0x3f3f0000, 0x3f3e0000, 0x3f3d0000, 0x3f3c0000, 0x3f3b0000,
44    0x3f3a0000, 0x3f390000, 0x3f380000, 0x3f370000, 0x3f360000, 0x3f350000, 0x3f340000, 0x3f330000,
45    0x3f320000, 0x3f310000, 0x3f300000, 0x3f2f0000, 0x3f2e0000, 0x3f2d0000, 0x3f2c0000, 0x3f2b0000,
46    0x3f2a0000, 0x3f2a0000, 0x3f290000, 0x3f280000, 0x3f270000, 0x3f260000, 0x3f250000, 0x3f250000,
47    0x3f240000, 0x3f230000, 0x3f220000, 0x3f210000, 0x3f200000, 0x3f200000, 0x3f1f0000, 0x3f1e0000,
48    0x3f1d0000, 0x3f1d0000, 0x3f1c0000, 0x3f1b0000, 0x3f1a0000, 0x3f1a0000, 0x3f190000, 0x3f180000,
49    0x3f180000, 0x3f170000, 0x3f160000, 0x3f160000, 0x3f150000, 0x3f140000, 0x3f140000, 0x3f130000,
50    0x3f120000, 0x3f120000, 0x3f110000, 0x3f100000, 0x3f100000, 0x3f0f0000, 0x3f0e0000, 0x3f0e0000,
51    0x3f0d0000, 0x3f0d0000, 0x3f0c0000, 0x3f0b0000, 0x3f0b0000, 0x3f0a0000, 0x3f0a0000, 0x3f090000,
52    0x3f080000, 0x3f080000, 0x3f070000, 0x3f070000, 0x3f060000, 0x3f060000, 0x3f050000, 0x3f050000,
53    0x3f040000, 0x3f040000, 0x3f030000, 0x3f030000, 0x3f020000, 0x3f020000, 0x3f010000, 0x3f000000,
54]);
55
56static LOG_R: [u64; 128] = [
57    0x0000000000000000,
58    0x3f8010157588de71,
59    0x3f90205658935847,
60    0x3f98492528c8cabf,
61    0x3fa0415d89e74444,
62    0x3fa466aed42de3ea,
63    0x3fa894aa149fb343,
64    0x3faccb73cdddb2cc,
65    0x3fb08598b59e3a07,
66    0x3fb1973bd1465567,
67    0x3fb3bdf5a7d1ee64,
68    0x3fb5e95a4d9791cb,
69    0x3fb700d30aeac0e1,
70    0x3fb9335e5d594989,
71    0x3fbb6ac88dad5b1c,
72    0x3fbc885801bc4b23,
73    0x3fbec739830a1120,
74    0x3fbfe89139dbd566,
75    0x3fc1178e8227e47c,
76    0x3fc1aa2b7e23f72a,
77    0x3fc2d1610c86813a,
78    0x3fc365fcb0159016,
79    0x3fc4913d8333b561,
80    0x3fc527e5e4a1b58d,
81    0x3fc6574ebe8c133a,
82    0x3fc6f0128b756abc,
83    0x3fc823c16551a3c2,
84    0x3fc8beafeb38fe8c,
85    0x3fc95a5adcf7017f,
86    0x3fca93ed3c8ad9e3,
87    0x3fcb31d8575bce3d,
88    0x3fcbd087383bd8ad,
89    0x3fcd1037f2655e7b,
90    0x3fcdb13db0d48940,
91    0x3fce530effe71012,
92    0x3fcef5ade4dcffe6,
93    0x3fcf991c6cb3b379,
94    0x3fd07138604d5862,
95    0x3fd0c42d676162e3,
96    0x3fd1178e8227e47c,
97    0x3fd16b5ccbacfb73,
98    0x3fd1bf99635a6b95,
99    0x3fd269621134db92,
100    0x3fd2bef07cdc9354,
101    0x3fd314f1e1d35ce4,
102    0x3fd36b6776be1117,
103    0x3fd3c25277333184,
104    0x3fd419b423d5e8c7,
105    0x3fd4718dc271c41b,
106    0x3fd4c9e09e172c3c,
107    0x3fd522ae0738a3d8,
108    0x3fd57bf753c8d1fb,
109    0x3fd5d5bddf595f30,
110    0x3fd630030b3aac49,
111    0x3fd68ac83e9c6a14,
112    0x3fd6e60ee6af1972,
113    0x3fd741d876c67bb1,
114    0x3fd79e26687cfb3e,
115    0x3fd7fafa3bd8151c,
116    0x3fd85855776dcbfb,
117    0x3fd8b639a88b2df5,
118    0x3fd914a8635bf68a,
119    0x3fd973a3431356ae,
120    0x3fd9d32bea15ed3b,
121    0x3fda33440224fa79,
122    0x3fda33440224fa79,
123    0x3fda93ed3c8ad9e3,
124    0x3fdaf5295248cdd0,
125    0x3fdb56fa04462909,
126    0x3fdbb9611b80e2fb,
127    0x3fdc1c60693fa39e,
128    0x3fdc1c60693fa39e,
129    0x3fdc7ff9c74554c9,
130    0x3fdce42f18064743,
131    0x3fdd490246defa6b,
132    0x3fddae75484c9616,
133    0x3fde148a1a2726ce,
134    0x3fde148a1a2726ce,
135    0x3fde7b42c3ddad73,
136    0x3fdee2a156b413e5,
137    0x3fdf4aa7ee03192d,
138    0x3fdf4aa7ee03192d,
139    0x3fdfb358af7a4884,
140    0x3fe00e5ae5b207ab,
141    0x3fe04360be7603ad,
142    0x3fe04360be7603ad,
143    0x3fe078bf0533c568,
144    0x3fe0ae76e2d054fa,
145    0x3fe0ae76e2d054fa,
146    0x3fe0e4898611cce1,
147    0x3fe11af823c75aa8,
148    0x3fe11af823c75aa8,
149    0x3fe151c3f6f29612,
150    0x3fe188ee40f23ca6,
151    0x3fe188ee40f23ca6,
152    0x3fe1c07849ae6007,
153    0x3fe1f8635fc61659,
154    0x3fe1f8635fc61659,
155    0x3fe230b0d8bebc98,
156    0x3fe269621134db92,
157    0x3fe269621134db92,
158    0x3fe2a2786d0ec107,
159    0x3fe2dbf557b0df43,
160    0x3fe2dbf557b0df43,
161    0x3fe315da4434068b,
162    0x3fe315da4434068b,
163    0x3fe35028ad9d8c86,
164    0x3fe38ae2171976e7,
165    0x3fe38ae2171976e7,
166    0x3fe3c6080c36bfb5,
167    0x3fe3c6080c36bfb5,
168    0x3fe4019c2125ca93,
169    0x3fe43d9ff2f923c5,
170    0x3fe43d9ff2f923c5,
171    0x3fe47a1527e8a2d3,
172    0x3fe47a1527e8a2d3,
173    0x3fe4b6fd6f970c1f,
174    0x3fe4b6fd6f970c1f,
175    0x3fe4f45a835a4e19,
176    0x3fe4f45a835a4e19,
177    0x3fe5322e26867857,
178    0x3fe5322e26867857,
179    0x3fe5707a26bb8c66,
180    0x3fe5707a26bb8c66,
181    0x3fe5af405c3649e0,
182    0x3fe5af405c3649e0,
183    0x3fe5ee82aa241920,
184    0x0000000000000000,
185];
186
187/// Natural logarithm
188///
189/// Max found ULP 0.4999988
190pub fn f_logf(x: f32) -> f32 {
191    let mut x_u = x.to_bits();
192
193    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
194
195    let mut m = -(E_BIAS as i32);
196    if x_u < 0x4c5d65a5u32 {
197        if x_u == 0x3f7f4d6fu32 {
198            return black_box(f64::from_bits(0xbf6659ec80000000) as f32) + min_normal_f32(true);
199        } else if x_u == 0x41178febu32 {
200            return black_box(f64::from_bits(0x4001fcbce0000000) as f32) + min_normal_f32(true);
201        } else if x_u == 0x3f800000u32 {
202            return 0.;
203        } else if x_u == 0x1e88452du32 {
204            return black_box(f64::from_bits(0xc046d7b180000000) as f32) + min_normal_f32(true);
205        }
206        if x_u < f32::MIN_POSITIVE.to_bits() {
207            if x == 0.0 {
208                return f32::NEG_INFINITY;
209            }
210            // Normalize denormal inputs.
211            x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
212            m -= 23;
213        }
214    } else {
215        if x_u == 0x4c5d65a5u32 {
216            return black_box(f32::from_bits(0x418f034b)) + min_normal_f32(true);
217        } else if x_u == 0x65d890d3u32 {
218            return black_box(f32::from_bits(0x4254d1f9)) + min_normal_f32(true);
219        } else if x_u == 0x6f31a8ecu32 {
220            return black_box(f32::from_bits(0x42845a89)) + min_normal_f32(true);
221        } else if x_u == 0x7a17f30au32 {
222            return black_box(f32::from_bits(0x42a28a1b)) + min_normal_f32(true);
223        } else if x_u == 0x500ffb03u32 {
224            return black_box(f32::from_bits(0x41b7ee9a)) + min_normal_f32(true);
225        } else if x_u == 0x5cd69e88u32 {
226            return black_box(f32::from_bits(0x4222e0a3)) + min_normal_f32(true);
227        } else if x_u == 0x5ee8984eu32 {
228            return black_box(f32::from_bits(0x422e4a21)) + min_normal_f32(true);
229        }
230
231        if x_u > f32::MAX.to_bits() {
232            if x_u == 0x80000000u32 {
233                return f32::NEG_INFINITY;
234            }
235            if x.is_sign_negative() && !x.is_nan() {
236                return f32::NAN + x;
237            }
238            // x is +inf or nan
239            if x.is_nan() {
240                return f32::NAN;
241            }
242
243            return x;
244        }
245    }
246
247    let mant = x_u & 0x007F_FFFF;
248    // Extract 7 leading fractional bits of the mantissa
249    let index = mant.wrapping_shr(16);
250    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
251    // all 1's.
252    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
253    x_u = set_exponent_f32(x_u, 0x7F);
254
255    let v;
256    let u = f32::from_bits(x_u);
257
258    #[cfg(any(
259        all(
260            any(target_arch = "x86", target_arch = "x86_64"),
261            target_feature = "fma"
262        ),
263        target_arch = "aarch64"
264    ))]
265    {
266        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
267    }
268    #[cfg(not(any(
269        all(
270            any(target_arch = "x86", target_arch = "x86_64"),
271            target_feature = "fma"
272        ),
273        target_arch = "aarch64"
274    )))]
275    {
276        use crate::logs::log2::LOG_RANGE_REDUCTION;
277        v = f_fmla(
278            u as f64,
279            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
280            -1.0,
281        ); // Exact
282    }
283    // Degree-5 polynomial approximation of log generated by Sollya with:
284    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
285    const COEFFS: [u64; 4] = [
286        0xbfe000000000fe63,
287        0x3fd555556e963c16,
288        0xbfd000028dedf986,
289        0x3fc966681bfda7f7,
290    ];
291    let v2 = v * v; // Exact
292    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
293    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
294    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
295    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
296    let r = f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2));
297    r as f32
298}
299
300#[inline]
301pub(crate) fn fast_logf(x: f32) -> f64 {
302    let mut x_u = x.to_bits();
303    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
304    let mut m = -(E_BIAS as i32);
305    if x_u < f32::MIN_POSITIVE.to_bits() {
306        // Normalize denormal inputs.
307        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
308        m -= 23;
309    }
310
311    let mant = x_u & 0x007F_FFFF;
312    // Extract 7 leading fractional bits of the mantissa
313    let index = mant.wrapping_shr(16);
314    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
315    // all 1's.
316    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
317    x_u = set_exponent_f32(x_u, 0x7F);
318
319    let v;
320    let u = f32::from_bits(x_u);
321
322    #[cfg(any(
323        all(
324            any(target_arch = "x86", target_arch = "x86_64"),
325            target_feature = "fma"
326        ),
327        target_arch = "aarch64"
328    ))]
329    {
330        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
331    }
332    #[cfg(not(any(
333        all(
334            any(target_arch = "x86", target_arch = "x86_64"),
335            target_feature = "fma"
336        ),
337        target_arch = "aarch64"
338    )))]
339    {
340        use crate::logs::log2::LOG_RANGE_REDUCTION;
341        v = f_fmla(
342            u as f64,
343            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
344            -1.0,
345        ); // Exact
346    }
347    // Degree-5 polynomial approximation of log generated by Sollya with:
348    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
349    const COEFFS: [u64; 4] = [
350        0xbfe000000000fe63,
351        0x3fd555556e963c16,
352        0xbfd000028dedf986,
353        0x3fc966681bfda7f7,
354    ];
355    let v2 = v * v; // Exact
356    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
357    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
358    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
359    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
360    f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2))
361}
362
363/// Log for given value for const context.
364/// This is simplified version just to make a good approximation on const context.
365#[inline]
366pub const fn logf(d: f32) -> f32 {
367    let ux = d.to_bits();
368    #[allow(clippy::collapsible_if)]
369    if ux < (1 << 23) || ux >= 0x7f800000u32 {
370        if ux == 0 || ux >= 0x7f800000u32 {
371            if ux == 0x7f800000u32 {
372                return d;
373            }
374            let ax = ux.wrapping_shl(1);
375            if ax == 0u32 {
376                // -0.0
377                return f32::NEG_INFINITY;
378            }
379            if ax > 0xff000000u32 {
380                return d + d;
381            } // nan
382            return f32::NAN;
383        }
384    }
385
386    let mut ix = d.to_bits();
387    /* reduce x into [sqrt(2)/2, sqrt(2)] */
388    ix += 0x3f800000 - 0x3f3504f3;
389    let n = (ix >> 23) as i32 - 0x7f;
390    ix = (ix & 0x007fffff) + 0x3f3504f3;
391    let a = f32::from_bits(ix) as f64;
392
393    let x = (a - 1.) / (a + 1.);
394    let x2 = x * x;
395    let mut u = 0.2222220222147750310e+0;
396    u = fmla(u, x2, 0.2857142871244668543e+0);
397    u = fmla(u, x2, 0.3999999999950960318e+0);
398    u = fmla(u, x2, 0.6666666666666734090e+0);
399    u = fmla(u, x2, 2.);
400    fmla(x, u, std::f64::consts::LN_2 * (n as f64)) as f32
401}
402
403#[cfg(test)]
404mod tests {
405    use super::*;
406
407    #[test]
408    fn test_logf() {
409        assert!(
410            (logf(1f32) - 0f32).abs() < 1e-6,
411            "Invalid result {}",
412            logf(1f32)
413        );
414        assert!(
415            (logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
416            "Invalid result {}",
417            logf(5f32)
418        );
419        assert_eq!(logf(0.), f32::NEG_INFINITY);
420        assert!(logf(-1.).is_nan());
421        assert!(logf(f32::NAN).is_nan());
422        assert!(logf(f32::NEG_INFINITY).is_nan());
423        assert_eq!(logf(f32::INFINITY), f32::INFINITY);
424    }
425
426    #[test]
427    fn test_flogf() {
428        assert!(
429            (f_logf(1f32) - 0f32).abs() < 1e-6,
430            "Invalid result {}",
431            f_logf(1f32)
432        );
433        assert!(
434            (f_logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
435            "Invalid result {}",
436            f_logf(5f32)
437        );
438        assert_eq!(f_logf(0.), f32::NEG_INFINITY);
439        assert!(f_logf(-1.).is_nan());
440        assert!(f_logf(f32::NAN).is_nan());
441        assert!(f_logf(f32::NEG_INFINITY).is_nan());
442        assert_eq!(f_logf(f32::INFINITY), f32::INFINITY);
443    }
444}