pxfm/logs/
logf.rs

1/*
2 * // Copyright (c) Radzivon Bartoshyk 7/2025. All rights reserved.
3 * //
4 * // Redistribution and use in source and binary forms, with or without modification,
5 * // are permitted provided that the following conditions are met:
6 * //
7 * // 1.  Redistributions of source code must retain the above copyright notice, this
8 * // list of conditions and the following disclaimer.
9 * //
10 * // 2.  Redistributions in binary form must reproduce the above copyright notice,
11 * // this list of conditions and the following disclaimer in the documentation
12 * // and/or other materials provided with the distribution.
13 * //
14 * // 3.  Neither the name of the copyright holder nor the names of its
15 * // contributors may be used to endorse or promote products derived from
16 * // this software without specific prior written permission.
17 * //
18 * // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 * // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 * // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 * // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 * // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29use crate::bits::min_normal_f32;
30use crate::common::*;
31use crate::polyeval::f_polyeval3;
32use std::hint::black_box;
33
34#[repr(C, align(8))]
35pub(crate) struct LogReductionF32Aligned(pub(crate) [u32; 128]);
36
37pub(crate) static LOG_REDUCTION_F32: LogReductionF32Aligned = LogReductionF32Aligned([
38    0x3f800000, 0x3f7e0000, 0x3f7c0000, 0x3f7a0000, 0x3f780000, 0x3f760000, 0x3f740000, 0x3f720000,
39    0x3f700000, 0x3f6f0000, 0x3f6d0000, 0x3f6b0000, 0x3f6a0000, 0x3f680000, 0x3f660000, 0x3f650000,
40    0x3f630000, 0x3f620000, 0x3f600000, 0x3f5f0000, 0x3f5d0000, 0x3f5c0000, 0x3f5a0000, 0x3f590000,
41    0x3f570000, 0x3f560000, 0x3f540000, 0x3f530000, 0x3f520000, 0x3f500000, 0x3f4f0000, 0x3f4e0000,
42    0x3f4c0000, 0x3f4b0000, 0x3f4a0000, 0x3f490000, 0x3f480000, 0x3f460000, 0x3f450000, 0x3f440000,
43    0x3f430000, 0x3f420000, 0x3f400000, 0x3f3f0000, 0x3f3e0000, 0x3f3d0000, 0x3f3c0000, 0x3f3b0000,
44    0x3f3a0000, 0x3f390000, 0x3f380000, 0x3f370000, 0x3f360000, 0x3f350000, 0x3f340000, 0x3f330000,
45    0x3f320000, 0x3f310000, 0x3f300000, 0x3f2f0000, 0x3f2e0000, 0x3f2d0000, 0x3f2c0000, 0x3f2b0000,
46    0x3f2a0000, 0x3f2a0000, 0x3f290000, 0x3f280000, 0x3f270000, 0x3f260000, 0x3f250000, 0x3f250000,
47    0x3f240000, 0x3f230000, 0x3f220000, 0x3f210000, 0x3f200000, 0x3f200000, 0x3f1f0000, 0x3f1e0000,
48    0x3f1d0000, 0x3f1d0000, 0x3f1c0000, 0x3f1b0000, 0x3f1a0000, 0x3f1a0000, 0x3f190000, 0x3f180000,
49    0x3f180000, 0x3f170000, 0x3f160000, 0x3f160000, 0x3f150000, 0x3f140000, 0x3f140000, 0x3f130000,
50    0x3f120000, 0x3f120000, 0x3f110000, 0x3f100000, 0x3f100000, 0x3f0f0000, 0x3f0e0000, 0x3f0e0000,
51    0x3f0d0000, 0x3f0d0000, 0x3f0c0000, 0x3f0b0000, 0x3f0b0000, 0x3f0a0000, 0x3f0a0000, 0x3f090000,
52    0x3f080000, 0x3f080000, 0x3f070000, 0x3f070000, 0x3f060000, 0x3f060000, 0x3f050000, 0x3f050000,
53    0x3f040000, 0x3f040000, 0x3f030000, 0x3f030000, 0x3f020000, 0x3f020000, 0x3f010000, 0x3f000000,
54]);
55
56static LOG_R: [u64; 128] = [
57    0x0000000000000000,
58    0x3f8010157588de71,
59    0x3f90205658935847,
60    0x3f98492528c8cabf,
61    0x3fa0415d89e74444,
62    0x3fa466aed42de3ea,
63    0x3fa894aa149fb343,
64    0x3faccb73cdddb2cc,
65    0x3fb08598b59e3a07,
66    0x3fb1973bd1465567,
67    0x3fb3bdf5a7d1ee64,
68    0x3fb5e95a4d9791cb,
69    0x3fb700d30aeac0e1,
70    0x3fb9335e5d594989,
71    0x3fbb6ac88dad5b1c,
72    0x3fbc885801bc4b23,
73    0x3fbec739830a1120,
74    0x3fbfe89139dbd566,
75    0x3fc1178e8227e47c,
76    0x3fc1aa2b7e23f72a,
77    0x3fc2d1610c86813a,
78    0x3fc365fcb0159016,
79    0x3fc4913d8333b561,
80    0x3fc527e5e4a1b58d,
81    0x3fc6574ebe8c133a,
82    0x3fc6f0128b756abc,
83    0x3fc823c16551a3c2,
84    0x3fc8beafeb38fe8c,
85    0x3fc95a5adcf7017f,
86    0x3fca93ed3c8ad9e3,
87    0x3fcb31d8575bce3d,
88    0x3fcbd087383bd8ad,
89    0x3fcd1037f2655e7b,
90    0x3fcdb13db0d48940,
91    0x3fce530effe71012,
92    0x3fcef5ade4dcffe6,
93    0x3fcf991c6cb3b379,
94    0x3fd07138604d5862,
95    0x3fd0c42d676162e3,
96    0x3fd1178e8227e47c,
97    0x3fd16b5ccbacfb73,
98    0x3fd1bf99635a6b95,
99    0x3fd269621134db92,
100    0x3fd2bef07cdc9354,
101    0x3fd314f1e1d35ce4,
102    0x3fd36b6776be1117,
103    0x3fd3c25277333184,
104    0x3fd419b423d5e8c7,
105    0x3fd4718dc271c41b,
106    0x3fd4c9e09e172c3c,
107    0x3fd522ae0738a3d8,
108    0x3fd57bf753c8d1fb,
109    0x3fd5d5bddf595f30,
110    0x3fd630030b3aac49,
111    0x3fd68ac83e9c6a14,
112    0x3fd6e60ee6af1972,
113    0x3fd741d876c67bb1,
114    0x3fd79e26687cfb3e,
115    0x3fd7fafa3bd8151c,
116    0x3fd85855776dcbfb,
117    0x3fd8b639a88b2df5,
118    0x3fd914a8635bf68a,
119    0x3fd973a3431356ae,
120    0x3fd9d32bea15ed3b,
121    0x3fda33440224fa79,
122    0x3fda33440224fa79,
123    0x3fda93ed3c8ad9e3,
124    0x3fdaf5295248cdd0,
125    0x3fdb56fa04462909,
126    0x3fdbb9611b80e2fb,
127    0x3fdc1c60693fa39e,
128    0x3fdc1c60693fa39e,
129    0x3fdc7ff9c74554c9,
130    0x3fdce42f18064743,
131    0x3fdd490246defa6b,
132    0x3fddae75484c9616,
133    0x3fde148a1a2726ce,
134    0x3fde148a1a2726ce,
135    0x3fde7b42c3ddad73,
136    0x3fdee2a156b413e5,
137    0x3fdf4aa7ee03192d,
138    0x3fdf4aa7ee03192d,
139    0x3fdfb358af7a4884,
140    0x3fe00e5ae5b207ab,
141    0x3fe04360be7603ad,
142    0x3fe04360be7603ad,
143    0x3fe078bf0533c568,
144    0x3fe0ae76e2d054fa,
145    0x3fe0ae76e2d054fa,
146    0x3fe0e4898611cce1,
147    0x3fe11af823c75aa8,
148    0x3fe11af823c75aa8,
149    0x3fe151c3f6f29612,
150    0x3fe188ee40f23ca6,
151    0x3fe188ee40f23ca6,
152    0x3fe1c07849ae6007,
153    0x3fe1f8635fc61659,
154    0x3fe1f8635fc61659,
155    0x3fe230b0d8bebc98,
156    0x3fe269621134db92,
157    0x3fe269621134db92,
158    0x3fe2a2786d0ec107,
159    0x3fe2dbf557b0df43,
160    0x3fe2dbf557b0df43,
161    0x3fe315da4434068b,
162    0x3fe315da4434068b,
163    0x3fe35028ad9d8c86,
164    0x3fe38ae2171976e7,
165    0x3fe38ae2171976e7,
166    0x3fe3c6080c36bfb5,
167    0x3fe3c6080c36bfb5,
168    0x3fe4019c2125ca93,
169    0x3fe43d9ff2f923c5,
170    0x3fe43d9ff2f923c5,
171    0x3fe47a1527e8a2d3,
172    0x3fe47a1527e8a2d3,
173    0x3fe4b6fd6f970c1f,
174    0x3fe4b6fd6f970c1f,
175    0x3fe4f45a835a4e19,
176    0x3fe4f45a835a4e19,
177    0x3fe5322e26867857,
178    0x3fe5322e26867857,
179    0x3fe5707a26bb8c66,
180    0x3fe5707a26bb8c66,
181    0x3fe5af405c3649e0,
182    0x3fe5af405c3649e0,
183    0x3fe5ee82aa241920,
184    0x0000000000000000,
185];
186
187/// Natural logarithm
188///
189/// Max found ULP 0.4999988
190#[inline]
191pub fn f_logf(x: f32) -> f32 {
192    let mut x_u = x.to_bits();
193
194    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
195
196    let mut m = -(E_BIAS as i32);
197    if x_u < 0x4c5d65a5u32 {
198        if x_u == 0x3f7f4d6fu32 {
199            return black_box(f64::from_bits(0xbf6659ec80000000) as f32) + min_normal_f32(true);
200        } else if x_u == 0x41178febu32 {
201            return black_box(f64::from_bits(0x4001fcbce0000000) as f32) + min_normal_f32(true);
202        } else if x_u == 0x3f800000u32 {
203            return 0.;
204        } else if x_u == 0x1e88452du32 {
205            return black_box(f64::from_bits(0xc046d7b180000000) as f32) + min_normal_f32(true);
206        }
207        if x_u < f32::MIN_POSITIVE.to_bits() {
208            if x == 0.0 {
209                return f32::NEG_INFINITY;
210            }
211            // Normalize denormal inputs.
212            x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
213            m -= 23;
214        }
215    } else {
216        if x_u == 0x4c5d65a5u32 {
217            return black_box(f32::from_bits(0x418f034b)) + min_normal_f32(true);
218        } else if x_u == 0x65d890d3u32 {
219            return black_box(f32::from_bits(0x4254d1f9)) + min_normal_f32(true);
220        } else if x_u == 0x6f31a8ecu32 {
221            return black_box(f32::from_bits(0x42845a89)) + min_normal_f32(true);
222        } else if x_u == 0x7a17f30au32 {
223            return black_box(f32::from_bits(0x42a28a1b)) + min_normal_f32(true);
224        } else if x_u == 0x500ffb03u32 {
225            return black_box(f32::from_bits(0x41b7ee9a)) + min_normal_f32(true);
226        } else if x_u == 0x5cd69e88u32 {
227            return black_box(f32::from_bits(0x4222e0a3)) + min_normal_f32(true);
228        } else if x_u == 0x5ee8984eu32 {
229            return black_box(f32::from_bits(0x422e4a21)) + min_normal_f32(true);
230        }
231
232        if x_u > f32::MAX.to_bits() {
233            if x_u == 0x80000000u32 {
234                return f32::NEG_INFINITY;
235            }
236            if x.is_sign_negative() && !x.is_nan() {
237                return f32::NAN + x;
238            }
239            // x is +inf or nan
240            if x.is_nan() {
241                return f32::NAN;
242            }
243
244            return x;
245        }
246    }
247
248    let mant = x_u & 0x007F_FFFF;
249    // Extract 7 leading fractional bits of the mantissa
250    let index = mant.wrapping_shr(16);
251    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
252    // all 1's.
253    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
254    x_u = set_exponent_f32(x_u, 0x7F);
255
256    let v;
257    let u = f32::from_bits(x_u);
258
259    #[cfg(any(
260        all(
261            any(target_arch = "x86", target_arch = "x86_64"),
262            target_feature = "fma"
263        ),
264        all(target_arch = "aarch64", target_feature = "neon")
265    ))]
266    {
267        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
268    }
269    #[cfg(not(any(
270        all(
271            any(target_arch = "x86", target_arch = "x86_64"),
272            target_feature = "fma"
273        ),
274        all(target_arch = "aarch64", target_feature = "neon")
275    )))]
276    {
277        use crate::logs::log2::LOG_RANGE_REDUCTION;
278        v = f_fmla(
279            u as f64,
280            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
281            -1.0,
282        ); // Exact
283    }
284    // Degree-5 polynomial approximation of log generated by Sollya with:
285    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
286    const COEFFS: [u64; 4] = [
287        0xbfe000000000fe63,
288        0x3fd555556e963c16,
289        0xbfd000028dedf986,
290        0x3fc966681bfda7f7,
291    ];
292    let v2 = v * v; // Exact
293    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
294    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
295    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
296    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
297    let r = f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2));
298    r as f32
299}
300
301#[inline]
302pub(crate) fn fast_logf(x: f32) -> f64 {
303    let mut x_u = x.to_bits();
304    const E_BIAS: u32 = (1u32 << (8 - 1u32)) - 1u32;
305    let mut m = -(E_BIAS as i32);
306    if x_u < f32::MIN_POSITIVE.to_bits() {
307        // Normalize denormal inputs.
308        x_u = (x * f64::from_bits(0x4160000000000000) as f32).to_bits();
309        m -= 23;
310    }
311
312    let mant = x_u & 0x007F_FFFF;
313    // Extract 7 leading fractional bits of the mantissa
314    let index = mant.wrapping_shr(16);
315    // Add unbiased exponent. Add an extra 1 if the 7 leading fractional bits are
316    // all 1's.
317    m = m.wrapping_add(x_u.wrapping_add(1 << 16).wrapping_shr(23) as i32);
318    x_u = set_exponent_f32(x_u, 0x7F);
319
320    let v;
321    let u = f32::from_bits(x_u);
322
323    #[cfg(any(
324        all(
325            any(target_arch = "x86", target_arch = "x86_64"),
326            target_feature = "fma"
327        ),
328        all(target_arch = "aarch64", target_feature = "neon")
329    ))]
330    {
331        v = f_fmlaf(u, f32::from_bits(LOG_REDUCTION_F32.0[index as usize]), -1.0) as f64; // Exact.
332    }
333    #[cfg(not(any(
334        all(
335            any(target_arch = "x86", target_arch = "x86_64"),
336            target_feature = "fma"
337        ),
338        all(target_arch = "aarch64", target_feature = "neon")
339    )))]
340    {
341        use crate::logs::log2::LOG_RANGE_REDUCTION;
342        v = f_fmla(
343            u as f64,
344            f64::from_bits(LOG_RANGE_REDUCTION[index as usize]),
345            -1.0,
346        ); // Exact
347    }
348    // Degree-5 polynomial approximation of log generated by Sollya with:
349    // > P = fpminimax(log(1 + x)/x, 4, [|1, D...|], [-2^-8, 2^-7]);
350    const COEFFS: [u64; 4] = [
351        0xbfe000000000fe63,
352        0x3fd555556e963c16,
353        0xbfd000028dedf986,
354        0x3fc966681bfda7f7,
355    ];
356    let v2 = v * v; // Exact
357    let p2 = f_fmla(v, f64::from_bits(COEFFS[3]), f64::from_bits(COEFFS[2]));
358    let p1 = f_fmla(v, f64::from_bits(COEFFS[1]), f64::from_bits(COEFFS[0]));
359    let p0 = f64::from_bits(LOG_R[index as usize]) + v;
360    const LOG_2: f64 = f64::from_bits(0x3fe62e42fefa39ef);
361    f_fmla(m as f64, LOG_2, f_polyeval3(v2, p0, p1, p2))
362}
363
364/// Log for given value for const context.
365/// This is simplified version just to make a good approximation on const context.
366#[inline]
367pub const fn logf(d: f32) -> f32 {
368    let ux = d.to_bits();
369    #[allow(clippy::collapsible_if)]
370    if ux < (1 << 23) || ux >= 0x7f800000u32 {
371        if ux == 0 || ux >= 0x7f800000u32 {
372            if ux == 0x7f800000u32 {
373                return d;
374            }
375            let ax = ux.wrapping_shl(1);
376            if ax == 0u32 {
377                // -0.0
378                return f32::NEG_INFINITY;
379            }
380            if ax > 0xff000000u32 {
381                return d + d;
382            } // nan
383            return f32::NAN;
384        }
385    }
386
387    let mut ix = d.to_bits();
388    /* reduce x into [sqrt(2)/2, sqrt(2)] */
389    ix += 0x3f800000 - 0x3f3504f3;
390    let n = (ix >> 23) as i32 - 0x7f;
391    ix = (ix & 0x007fffff) + 0x3f3504f3;
392    let a = f32::from_bits(ix) as f64;
393
394    let x = (a - 1.) / (a + 1.);
395    let x2 = x * x;
396    let mut u = 0.2222220222147750310e+0;
397    u = fmla(u, x2, 0.2857142871244668543e+0);
398    u = fmla(u, x2, 0.3999999999950960318e+0);
399    u = fmla(u, x2, 0.6666666666666734090e+0);
400    u = fmla(u, x2, 2.);
401    fmla(x, u, std::f64::consts::LN_2 * (n as f64)) as f32
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn test_logf() {
410        assert!(
411            (logf(1f32) - 0f32).abs() < 1e-6,
412            "Invalid result {}",
413            logf(1f32)
414        );
415        assert!(
416            (logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
417            "Invalid result {}",
418            logf(5f32)
419        );
420        assert_eq!(logf(0.), f32::NEG_INFINITY);
421        assert!(logf(-1.).is_nan());
422        assert!(logf(f32::NAN).is_nan());
423        assert!(logf(f32::NEG_INFINITY).is_nan());
424        assert_eq!(logf(f32::INFINITY), f32::INFINITY);
425    }
426
427    #[test]
428    fn test_flogf() {
429        assert!(
430            (f_logf(1f32) - 0f32).abs() < 1e-6,
431            "Invalid result {}",
432            f_logf(1f32)
433        );
434        assert!(
435            (f_logf(5f32) - 1.60943791243410037460f32).abs() < 1e-6,
436            "Invalid result {}",
437            f_logf(5f32)
438        );
439        assert_eq!(f_logf(0.), f32::NEG_INFINITY);
440        assert!(f_logf(-1.).is_nan());
441        assert!(f_logf(f32::NAN).is_nan());
442        assert!(f_logf(f32::NEG_INFINITY).is_nan());
443        assert_eq!(f_logf(f32::INFINITY), f32::INFINITY);
444    }
445}